@article {pmid39833199, year = {2025}, author = {Sivarajan, V and Ganesh, AV and Subramani, P and Ganesapandi, P and Sivanandan, RN and Prakash, S and Manikandan, N and Dharmarajan, A and Arfuso, F and Warrier, S and Raj, M and Perumal, K}, title = {Prevalence and genomic insights of carbapenem resistant and ESBL producing Multidrug resistant Escherichia coli in urinary tract infections.}, journal = {Scientific reports}, volume = {15}, number = {1}, pages = {2541}, pmid = {39833199}, issn = {2045-2322}, mesh = {*Urinary Tract Infections/microbiology/drug therapy ; Humans ; *Drug Resistance, Multiple, Bacterial/genetics ; *Escherichia coli/genetics/drug effects/isolation & purification ; *Escherichia coli Infections/microbiology/drug therapy/epidemiology ; *beta-Lactamases/genetics ; *Carbapenems/pharmacology ; Prevalence ; *Anti-Bacterial Agents/pharmacology ; Microbial Sensitivity Tests ; Whole Genome Sequencing/methods ; Female ; Male ; Middle Aged ; Adult ; Genome, Bacterial ; Aged ; Genomics/methods ; Phylogeny ; Young Adult ; Adolescent ; Child ; }, abstract = {Urinary tract infections are a common condition affecting people globally, with multidrug-resistant (MDR) Escherichia coli (E. coli) being a major causative agent. Antimicrobial susceptibility profiling was performed using the VITEK 2 automated system for 1254 E. coli isolates, revealing that 831(66.2%) isolates were determined as MDR E. coli. A significant resistance pattern was observed for nalidixic acid (86.04%), ampicillin (74.16%), ticarcillin (70.73%), cefalotin (65.23%), cefixime (62.68%), ciprofloxacin (55.18%), ceftriaxone (53.75%), amoxicillin-clavulanic acid (22.81%), ertapenem (7.18%), and fosfomycin (2.23%). Whole Genome Sequencing of Carbapenem-resistant E. coli (CREC)-CREC 3 (ST405), CREC 4 (ST448), and CREC 5 (ST167) was performed to determine genomic characteristics. CREC 3, CREC 4, and CREC 5 belong to the phylogroup D, B1, and A, respectively. The NDM-5 gene was common in all three isolates, with CTX-M-15 being present in CREC 3 and CREC 4. Virulence factors of CREC 3 (fliC, shuA), CREC 4 (spaS), CREC 5 (iucA, papH, papG, iucB, yigF), and plasmids (IncFIA, IncFIB) were identified to be significant. The use of pangenome analysis enhances our understanding of resistance traits of isolates ST167, ST405, and ST448, offering valuable insights into comparative genomics of uropathogenic MDR E. coli.}, } @article {pmid39829883, year = {2025}, author = {Beebe, MA and Paredes-Sabja, D and Kociolek, LK and Rodríguez, C and Sorg, JA}, title = {Phenotypic analysis of various Clostridioides difficile ribotypes reveals consistency among core processes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2025.01.10.632434}, pmid = {39829883}, issn = {2692-8205}, abstract = {UNLABELLED: Clostridioides difficile infections (CDI) cause almost 300,000 hospitalizations per year of which ∼15-30% are the result of recurring infections. The prevalence and persistence of CDI in hospital settings has resulted in an extensive collection of C. difficile clinical isolates and their classification, typically by ribotype. While much of the current literature focuses on one or two prominent ribotypes (e.g ., RT027), recent years have seen several other ribotypes dominate the clinical landscape (e.g. , RT106 and RT078). Some ribotypes are associated with severe disease and / or increased recurrence rates, but why are certain ribotypes more prominent or harmful than others remains unknown. Because C. difficile has a large, open pan-genome, this observed relationship between ribotype and clinical outcome could be a result of the genetic diversity of C. difficile . Thus, we hypothesize that core biological processes of C. difficile are conserved across ribotypes / clades. We tested this hypothesis by observing the growth kinetics, sporulation, germination, bile acid sensitivity, bile salt hydrolase activity, and surface motility of fifteen strains belonging to various ribotypes spanning each known C. difficile clade. In viewing these phenotypes across each strain, we see that core phenotypes (growth, germination, sporulation, and resistance to bile salt toxicity) are remarkably consistent across clades / ribotypes. This suggests that variations observed in the clinical setting may be due to unidentified factors in the accessory genome or due to unknown host-factors.

IMPORTANCE: C. difficile infections impact thousands of individuals every year many of whom experience recurring infections. Clinical studies have reported an unexplained correlation between some clades / ribotypes of C. difficile and disease severity / recurrence. Here, we demonstrate that C. difficile strains across the major clades / ribotypes are consistent in their core phenotypes. This suggests that such phenotypes are not responsible for variations in disease severity / recurrence and are ideal targets for the development of therapeutics meant to treat C. difficile related infections.}, } @article {pmid39829834, year = {2025}, author = {Frias-De-Diego, A and Jara, M and Lanzas, C}, title = {Influence of Sequencing Technology on Pangenome-level Analysis and Detection of Antimicrobial Resistance Genes in ESKAPE Pathogens.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2025.01.08.631980}, pmid = {39829834}, issn = {2692-8205}, abstract = {UNLABELLED: As sequencing costs decrease, short-read and long-read technologies are indispensable tools for uncovering the genetic drivers behind bacterial pathogen resistance. This study explores the differences between the use of short-read (Illumina) and long-read (Oxford Nanopore Technologies, ONT) sequencing in detecting antimicrobial resistance (AMR) genes in ESKAPE pathogens (Enterococcus faecium, Staphylococcus aureus, Klebsiella pneumoniae, Acinetobacter baumannii, Pseudomonas aeruginosa, and Enterobacter cloacae). Utilizing a dataset of 1,385 whole genome sequences and applying commonly used bioinformatic methods in bacterial genomics, we assessed the differences in genomic completeness, pangenome structure, and AMR gene and point mutation identification. Illumina presented higher genome completeness, while ONT identified a broader pangenome. Hybrid assembly outperformed both Illumina and ONT at identifying key AMR genetic determinants, presented results closer to Illumina's completeness, and revealed ONT-like pangenomic content. Notably, Illumina consistently detected more AMR-related point mutations than its counterparts. This highlights the importance of method selection based on research goals. Differences were also observed for specific gene classes and bacterial species, underscoring the need for a nuanced understanding of technology limitations. Overall, this study reveals the strengths and limitations of each approach, advocating for the use of Illumina for common AMR analysis; ONT for studying complex genomes and novel species, and hybrid assembly for a more comprehensive characterization, leveraging the benefits of both technologies.

IMPACT STATEMENT: This study provides a comprehensive comparison of short-read (Illumina) and long-read (Oxford Nanopore Technologies, ONT) sequencing technologies in the context of antimicrobial resistance (AMR) detection in ESKAPE pathogens. By analyzing a large dataset of 1,385 whole genome sequences, the research offers valuable insights into the strengths and limitations of each approach, as well as the benefits of the novel approach of hybrid assembly. These findings have broad utility across microbiology, genomics, and infectious disease research. In particular, they apply to the work of researchers and clinicians dealing with AMR surveillance, investigation into outbreaks, and bacterial genome analysis. Given the nuance with which technological differences in genomic completeness, pangenome structure, and AMR determinant detection have been explored in this study, it is a good basis for informed method selection for future research. While the output represents an incremental advance, its significance lies in its practical implications. It thus enables researchers to take more reasonable decisions in designing genomic studies of bacterial pathogens by showing the complementarity of various sequencing approaches and their specific strengths. This could lead to more accurate and comprehensive detection of AMR, which would contribute ultimately to improved antibiotic stewardship and public health strategies.

DATA SUMMARY: The authors confirm all supporting data, code and protocols have been provided within the article or through supplementary data files.

REPOSITORIES: All the sequences used for this study are publicly accessible from GenBank, and their individual IDs are disclosed in Supplementary Table 1.}, } @article {pmid39828703, year = {2025}, author = {Zhao, Q and Yin, Z and Hou, Z}, title = {Near telomere-to-telomere genome assemblies of Silkie Gallus gallus and Mallard Anas platyrhynchos restored the structure of chromosomes and "missing" genes in birds.}, journal = {Journal of animal science and biotechnology}, volume = {16}, number = {1}, pages = {9}, pmid = {39828703}, issn = {1674-9782}, abstract = {BACKGROUND: Chickens and ducks are vital sources of animal protein for humans. Recent pangenome studies suggest that a single genome is insufficient to represent the genetic information of a species, highlighting the need for more comprehensive genomes. The bird genome has more than tens of microchromosomes, but comparative genomics, annotations, and the discovery of variations are hindered by inadequate telomere-to-telomere level assemblies. We aim to complete the chicken and duck genomes, recover missing genes, and reveal common and unique chromosomal features between birds.

RESULTS: The near telomere-to-telomere genomes of Silkie Gallus gallus and Mallard Anas platyrhynchos were successfully assembled via multiple high-coverage complementary technologies, with quality values of 36.65 and 44.17 for Silkie and Mallard, respectively; and BUSCO scores of 96.55% and 96.97% for Silkie and Mallard, respectively; the mapping rates reached over 99.52% for both assembled genomes, these evaluation results ensured high completeness and accuracy. We successfully annotated 20,253 and 19,621 protein-coding genes for Silkie and Mallard, respectively, and assembled gap-free sex chromosomes in Mallard for the first time. Comparative analysis revealed that microchromosomes differ from macrochromosomes in terms of GC content, repetitive sequence abundance, gene density, and levels of 5mC methylation. Different types of arrangements of centromeric repeat sequence centromeres exist in both Silkie and the Mallard genomes, with Mallard centromeres being invaded by CR1. The highly heterochromatic W chromosome, which serves as a refuge for ERVs, contains disproportionately long ERVs. Both Silkie and the Mallard genomes presented relatively high 5mC methylation levels on sex chromosomes and microchromosomes, and the telomeres and centromeres presented significantly higher 5mC methylation levels than the whole genome. Finally, we recovered 325 missing genes via our new genomes and annotated TNFA in Mallard for the first time, revealing conserved protein structures and tissue-specific expression.

CONCLUSIONS: The near telomere-to-telomere assemblies in Mallard and Silkie, with the first gap-free sex chromosomes in ducks, significantly enhanced our understanding of genetic structures in birds, specifically highlighting the distinctive chromosome features between the chicken and duck genomes. This foundational work also provides a series of newly identified missing genes for further investigation.}, } @article {pmid39827750, year = {2025}, author = {Galasong, Y and Kijpatanasilp, I and Çobo, M and Asadatorn, N and Wang, R and Assatarakul, K and Worobo, RW}, title = {Spoilage investigation of pasteurized apple juice with visual defects identifies a potentially novel Acetobacter species as the primary spoilage agent.}, journal = {International journal of food microbiology}, volume = {430}, number = {}, pages = {111056}, doi = {10.1016/j.ijfoodmicro.2025.111056}, pmid = {39827750}, issn = {1879-3460}, abstract = {Jellified materials were observed in spoiled pasteurized apple juice that contained dimethyl dicarbonate (DMDC). Microbiological analysis showed a high microbial load (4-5 log CFU/mL) in the sample. Acetobacter spp. was identified as the spoilage microorganism by 16S rRNA gene sequencing. Metataxonomic analysis showed Acetobacter represented 99 % of the bacterial community. Three Acetobacter isolates (LX5, LX9 and LX16) were selected for whole genome sequencing and characterized for their susceptibility to DMDC. Genome-based phylogeny supported the species-level classification of LX5 as A. fabarum. It also suggested LX9 and LX16 are the same microorganisms from a potentially novel species closely related to A. lovaniensis. The minimum inhibitory concentrations (MICs) of DMDC for Acetobacter isolates in sterile apple juice (pH ∼3) at 30 °C were 46 ppm and 329 ppm for A. fabarum LX5 and Acetobacter LX9/LX16, respectively. The minimum bactericidal concentrations (MBCs) were 250 and 500 ppm for A. fabarum LX5 and Acetobacter LX9/LX16, respectively. The inoculum concentration for the MIC assay was approximately 6 log10 CFU/mL, representing the "worst-case" scenario. When the contamination level was reduced to 500 CFU/mL per US federal regulation (21 CFR 172.133) and the apple juice was refrigerated, Acetobacter isolates did not grow and were completely inhibited by 238 ppm DMDC. Pangenome analysis identified gene clusters that potentially play a role in biofilm development, carbohydrate metabolism, and oxidative stress tolerance, but it also ruled out the involvement of Acetobacter in apple juice gel formation. The investigation concluded that post-pasteurization contamination, high microbial load and ambient storage were factors leading to this spoilage incident.}, } @article {pmid39826711, year = {2025}, author = {Uchiyama, I and Mihara, M and Nishide, H and Chiba, H and Takayanagi, M and Kawai, M and Takami, H}, title = {MBGD: Microbial genome database for comparative analysis featuring enhanced functionality to characterize gene and genome functions through large-scale orthology analysis.}, journal = {Journal of molecular biology}, volume = {}, number = {}, pages = {168957}, doi = {10.1016/j.jmb.2025.168957}, pmid = {39826711}, issn = {1089-8638}, abstract = {Microbial Genome Database for Comparative Analysis (MBGD) is a comprehensive ortholog database encompassing published complete microbial genomes. The ortholog tables in MBGD are constructed in a hierarchical manner. The top-level ortholog table is now constructed from 1,812 genus-level pan-genomes, 6,268 species-level pan-genomes, and 34,079 genomes in total. To support analyses of newly sequenced genomes, MBGD updates MyMBGD functionality, which offers two analysis modes: assignment mode and clustering mode. Assignment mode rapidly classifies genes in the query genomes into existing MBGD ortholog groups, while clustering mode performs de novo clustering of query genomes using the DomClust program. In assignment mode, users can evaluate the presence of genomic functions, as defined in the KEGG Module database, in each query genome using the Genomaple software and compare the results across multiple genomes. To enhance this analysis, we developed a method to subdivide MBGD ortholog groups as needed to improve cross-references to the KEGG Orthology groups. Another notable feature is the phylogenetic profile search interface, which enables users to specify a set of organisms in which orthologs are present or absent (i.e., a phylogenetic profile), and search for ortholog groups with similar phylogenetic profiles. To construct a phylogenetic profile, users can search organisms by specifying phenotype, environment, taxonomy, or a particular ortholog group. MBGD is available at https://mbgd.nibb.ac.jp/.}, } @article {pmid39826720, year = {2025}, author = {Akhoon, BA and Qiao, Q and Stewart, A and Chen, J and Rodriguez Lopez, CM and Corbin, KR}, title = {Pangenomic analysis of the bacterial cellulose-producing genera Komagataeibacter and Novacetimonas.}, journal = {International journal of biological macromolecules}, volume = {}, number = {}, pages = {139980}, doi = {10.1016/j.ijbiomac.2025.139980}, pmid = {39826720}, issn = {1879-0003}, abstract = {Bacterial cellulose (BC) holds significant commercial potential due to its unique structural and chemical properties, making it suitable for applications in electronics, medicine, and pharmaceuticals. However, large-scale BC production remains limited by challenges in bacterial performance. In this study, we compared 79 microbial genomes from three genera-Komagataeibacter, Novacetimonas, and Gluconacetobacter-to investigate their pangenomes, genetic diversity, and evolutionary relationships. Through comparative genomic and phylogenetic analyses, we identified distinct genome compositions and evolutionary patterns that differ from previous reports. The role of horizontal gene transfer (HGT) in shaping the genetic diversity and adaptability of these bacteria was also explored. Key determinants in BC production, such as variations in the bacterial cellulose biosynthesis (bcs) operon, carbohydrate uptake genes, and carbohydrate-active enzymes, were examined. Additionally, several biosynthetic gene clusters (BGCs), including Linocin M18 and sactipeptides, which encode for antimicrobial peptides known as bacteriocins, were identified. These findings reveal new aspects of the genetic diversity in cellulose-producing bacteria and present a comprehensive genomic toolkit that will support future efforts to optimize BC production and improve microbial performance for commercial applications.}, } @article {pmid39813779, year = {2025}, author = {He, G and Liu, C and Wang, M}, title = {Perspectives and opportunities in forensic human, animal, and plant integrative genomics in the Pangenome era.}, journal = {Forensic science international}, volume = {367}, number = {}, pages = {112370}, doi = {10.1016/j.forsciint.2025.112370}, pmid = {39813779}, issn = {1872-6283}, abstract = {The Human Pangenome Reference Consortium, the Chinese Pangenome Consortium, and other plant and animal pangenome projects have announced the completion of pilot work aimed at constructing high-quality, haplotype-resolved reference graph genomes representative of global ethno-linguistically different populations or different plant and animal species. These graph-based, gapless pangenome references, which are enriched in terms of genomic diversity, completeness, and contiguity, have the potential for enhancing long-read sequencing (LRS)-based genomic research, as well as improving mappability and variant genotyping on traditional short-read sequencing platforms. We comprehensively discuss the advancements in pangenome-based genomic integrative genomic discoveries across forensic-related species (humans, animals, and plants) and summarize their applications in variant identification and forensic genomics, epigenetics, transcriptomics, and microbiome research. Recent developments in multiplexed array sequencing have introduced a highly efficient and programmable technique to overcome the limitations of short forensic marker lengths in LRS platforms. This technique enables the concatenation of short RNA transcripts and DNA fragments into LRS-optimal molecules for sequencing, assembly, and genotyping. The integration of new pangenome reference coordinates and corresponding computational algorithms will benefit forensic integrative genomics by facilitating new marker identification, accurate genotyping, high-resolution panel development, and the updating of statistical algorithms. This review highlights the necessity of integrating LRS-based platforms, pangenome-based study designs, and graph-based pangenome references in short-read mapping and LRS-based innovations to achieve precision forensic science.}, } @article {pmid39812022, year = {2025}, author = {Gamblin, J and Lambert, A and Blanquart, F}, title = {Persistent, Private and Mobile genes: a model for gene dynamics in evolving pangenomes.}, journal = {Molecular biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/molbev/msaf001}, pmid = {39812022}, issn = {1537-1719}, abstract = {The pangenome of a species is the set of all genes carried by at least one member of the species. In bacteria, pangenomes can be much larger than the set of genes carried by a single organism. Many questions remain unanswered regarding the evolutionary forces shaping the patterns of presence/absence of genes in pangenomes of a given species. We introduce a new model for bacterial pangenome evolution along a species phylogeny that explicitly describes the timing of appearance of each gene in the species and accounts for three generic types of gene evolutionary dynamics: persistent genes that are present in the ancestral genome, private genes that are specific to a given clade, and mobile genes that are imported once into the gene pool and then undergo frequent horizontal gene transfers. We call this model the Persistent-Private-Mobile (PPM) model. We develop an algorithm fitting the PPM model and apply it to a dataset of 902 Salmonella enterica genomes. We show that the best fitting model is able to reproduce the global pattern of some multivariate statistics like the gene frequency spectrum and the parsimony vs. frequency plot. Moreover, the gene classification induced by the PPM model allows us to study the position of accessory genes on the chromosome depending on their category, as well as the gene functions that are most present in each category. This work paves the way for a mechanistic understanding of pangenome evolution, and the PPM model developed here could be used for dynamics-aware gene classification.}, } @article {pmid39810189, year = {2025}, author = {Mohite, OS and Jørgensen, TS and Booth, TJ and Charusanti, P and Phaneuf, PV and Weber, T and Palsson, BO}, title = {Pangenome mining of the Streptomyces genus redefines species' biosynthetic potential.}, journal = {Genome biology}, volume = {26}, number = {1}, pages = {9}, pmid = {39810189}, issn = {1474-760X}, mesh = {*Streptomyces/genetics/metabolism ; *Genome, Bacterial ; *Multigene Family ; Biosynthetic Pathways/genetics ; Phylogeny ; Genomics ; Synteny ; }, abstract = {BACKGROUND: Streptomyces is a highly diverse genus known for the production of secondary or specialized metabolites with a wide range of applications in the medical and agricultural industries. Several thousand complete or nearly complete Streptomyces genome sequences are now available, affording the opportunity to deeply investigate the biosynthetic potential within these organisms and to advance natural product discovery initiatives.

RESULTS: We perform pangenome analysis on 2371 Streptomyces genomes, including approximately 1200 complete assemblies. Employing a data-driven approach based on genome similarities, the Streptomyces genus was classified into 7 primary and 42 secondary Mash-clusters, forming the basis for comprehensive pangenome mining. A refined workflow for grouping biosynthetic gene clusters (BGCs) redefines their diversity across different Mash-clusters. This workflow also reassigns 2729 known BGC families to only 440 families, a reduction caused by inaccuracies in BGC boundary detections. When the genomic location of BGCs is included in the analysis, a conserved genomic structure, or synteny, among BGCs becomes apparent within species and Mash-clusters. This synteny suggests that vertical inheritance is a major factor in the diversification of BGCs.

CONCLUSIONS: Our analysis of a genomic dataset at a scale of thousands of genomes refines predictions of BGC diversity using Mash-clusters as a basis for pangenome analysis. The observed conservation in the order of BGCs' genomic locations shows that the BGCs are vertically inherited. The presented workflow and the in-depth analysis pave the way for large-scale pangenome investigations and enhance our understanding of the biosynthetic potential of the Streptomyces genus.}, } @article {pmid39807864, year = {2025}, author = {Nakatsu, G and Ko, D and Michaud, M and Franzosa, EA and Morgan, XC and Huttenhower, C and Garrett, WS}, title = {Virulence factor discovery identifies associations between the Fic gene family and Fap2[+] fusobacteria in colorectal cancer microbiomes.}, journal = {mBio}, volume = {}, number = {}, pages = {e0373224}, doi = {10.1128/mbio.03732-24}, pmid = {39807864}, issn = {2150-7511}, abstract = {Fusobacterium is a bacterium associated with colorectal cancer (CRC) tumorigenesis, progression, and metastasis. Fap2 is a fusobacteria-specific outer membrane galactose-binding lectin that mediates Fusobacterium adherence to and invasion of CRC tumors. Advances in omics analyses provide an opportunity to profile and identify microbial genomic features that correlate with the cancer-associated bacterial virulence factor Fap2. Here, we analyze genomes of Fusobacterium colon tumor isolates and find that a family of post-translational modification enzymes containing Fic domains is associated with Fap2 positivity in these strains. We demonstrate that Fic family genes expand with the presence of Fap2 in the fusobacterial pangenome. Through comparative genomic analysis, we find that Fap2[+] Fusobacteriota are highly enriched with Fic gene families compared to other cancer-associated and human gut microbiome bacterial taxa. Using a global data set of CRC shotgun metagenomes, we show that fusobacterial Fic and Fap2 genes frequently co-occur in the fecal microbiomes of individuals with late-stage CRC. We further characterize specific Fic gene families harbored by Fap2[+] Fusobacterium animalis genomes and detect recombination events and elements of horizontal gene transfer via synteny analysis of Fic gene loci. Exposure of a F. animalis strain to a colon adenocarcinoma cell line increases gene expression of fusobacterial Fic and virulence-associated adhesins. Finally, we demonstrate that Fic proteins are synthesized by F. animalis as Fic peptides are detectable in F. animalis monoculture supernatants. Taken together, our study uncovers Fic genes as potential virulence factors in Fap2[+] fusobacterial genomes.IMPORTANCEAccumulating data support that bacterial members of the intra-tumoral microbiota critically influence colorectal cancer progression. Yet, relatively little is known about non-adhesin fusobacterial virulence factors that may influence carcinogenesis. Our genomic analysis and expression assays in fusobacteria identify Fic domain-containing genes, well-studied virulence factors in pathogenic bacteria, as potential fusobacterial virulence features. The Fic family proteins that we find are encoded by fusobacteria and expressed by Fusobacterium animalis merit future investigation to assess their roles in colorectal cancer development and progression.}, } @article {pmid39805704, year = {2025}, author = {Miao, Z and Yue, JX}, title = {Interactive visualization and interpretation of pangenome graphs by linear-reference-based coordinate projection and annotation integration.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.279461.124}, pmid = {39805704}, issn = {1549-5469}, abstract = {With the increasing availability of high-quality genome assemblies, pangenome graphs emerged as a new paradigm in the genomics field for identifying, encoding, and presenting genomic variation at both population and species levels. However, it remains challenging to truly dissect and interpret pangenome graphs via biologically informative visualization. To facilitate better exploration and understanding of pangenome graphs towards novel biological insights, here we present a web-based interactive Visualization and interpretation framework for linear-Reference-projected Pangenome Graphs (VRPG). VRPG provides efficient and intuitive supports for exploring and annotating pangenome graphs along a linear-genome-based coordinate system (e.g., that of a primary linear reference genome). Moreover, VRPG offers many unique features such as in-graph path highlighting for graph-constituent input assemblies, copy number characterization for graph-embedding nodes, graph-based mapping for query sequences, all of which are highly valuable for researchers working with pangenome graphs. Additionally, VRPG enables side-by-side visualization between the graph-based pangenome representation and the conventional primary-linear-reference-genome-based feature annotations, therefore seamlessly bridging the graph and linear genomic contexts. To further demonstrate its functionality and scalability, we applied VRPG to the cutting-edge yeast and human reference pangenome graphs derived from hundreds of high-quality genome assemblies via a dedicated web portal and examined their local genome diversity in the graph contexts.}, } @article {pmid39803467, year = {2025}, author = {Shivakumar, VS and Langmead, B}, title = {Mumemto: efficient maximal matching across pangenomes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2025.01.05.631388}, pmid = {39803467}, issn = {2692-8205}, abstract = {Aligning genomes into common coordinates is central to pangenome analysis and construction, but it is also computationally expensive. Multi-sequence maximal unique matches (multi-MUMs) are guideposts for core genome alignments, helping to frame and solve the multiple alignment problem. We introduce Mumemto, a tool that computes multi-MUMs and other match types across large pangenomes. Mumemto allows for visualization of synteny, reveals aberrant assemblies and scaffolds, and highlights pangenome conservation and structural variation. Mumemto computes multi-MUMs across 320 human genome assemblies (960GB) in 25.7 hours with under 800 GB of memory, and over hundreds of fungal genome assemblies in minutes. Mumemto is implemented in C++ and Python and available open-source at https://github.com/vikshiv/mumemto .}, } @article {pmid39802740, year = {2025}, author = {Liu, Z and Bernard, A and Wang, Y and Dirlewanger, E and Wang, X}, title = {Genomes and integrative genomic insights into the genetic architecture of main agronomic traits in the edible cherries.}, journal = {Horticulture research}, volume = {12}, number = {1}, pages = {uhae269}, pmid = {39802740}, issn = {2662-6810}, abstract = {Cherries are one of the economically important fruit crops in the Rosaceae family, Prunus genus. As the first fruits of the spring season in the northern hemisphere, their attractive appearance, intensely desirable tastes, high nutrients content, and consumer-friendly size captivate consumers worldwide. In the past 30 years, although cherry geneticists and breeders have greatly progressed in understanding the genetic and molecular basis underlying fruit quality, adaptation to climate change, and biotic and abiotic stress resistance, the utilization of cherry genomic data in genetics and molecular breeding has remained limited to date. Here, we thoroughly investigated recent discoveries in constructing genetic linkage maps, identifying quantitative trait loci (QTLs), genome-wide association studies (GWAS), and validating functional genes of edible cherries based on available de novo genomes and genome resequencing data of edible cherries. We further comprehensively demonstrated the genetic architecture of the main agronomic traits of edible cherries by methodically integrating QTLs, GWAS loci, and functional genes into the identical reference genome with improved annotations. These collective endeavors will offer new perspectives on the availability of sequence data and the construction of an interspecific pangenome of edible cherries, ultimately guiding cherry breeding strategies and genetic improvement programs, and facilitating the exploration of similar traits and breeding innovations across Prunus species.}, } @article {pmid39802648, year = {2025}, author = {Kan, NP and Yin, Z and Qiu, YF and Zheng, E and Chen, J and Huang, J and Du, Y}, title = {A pan-genome perspective on the evolutionary dynamics of polyphyly, virulence, and antibiotic resistance in Salmonella enterica serovar Mbandaka highlights emerging threats to public health and food safety posed by cloud gene families.}, journal = {Current research in food science}, volume = {10}, number = {}, pages = {100957}, pmid = {39802648}, issn = {2665-9271}, abstract = {Salmonella enterica serovar Mbandaka, a prevalent foodborne pathogen, poses a threat to public health but remains poorly understood. We have determined the phylogenomic tree, genetic diversity, virulence, and antimicrobial resistance (AMR) profiles on a large genomic scale to elucidate the evolutionary dynamics within the Mbandaka pan-genome. The polyphyletic nature of this serovar is characterized by two distinct phylogenetic groups and inter-serovar recombination boundaries, that potentially arising from recombination events at the H2-antigen loci. The open pan-genome exhibited a flexible gene repertoire, with numerous cloud gene families involved in virulence and AMR. Extensive gene gain and loss observed at the terminal nodes of the phylogenetic tree indicate that Mbandaka individuals have undergone frequent gene turnover. The resulting changes in virulence and AMR genes potentially pose emerging threats to public health. We explored serovar conversion due to recombination of H-antigen loci, inter-serovar divergences in gene gain and loss, prophage-mediated acquisition of virulence factors, and the role of incompatibility group plasmids in acquiring resistance determinants as key molecular mechanisms driving the pathogenicity and antibiotic resistance of Mbandaka. Our work contributes to a comprehensive understanding of the complex mechanisms of pathogenesis and the ongoing evolutionary arms race with current therapeutic approaches in serovar Mbandaka.}, } @article {pmid39799915, year = {2025}, author = {Yang, J and Zhang, S and Geng, L and Zhao, D and Xing, S and Ji, X and Yan, L}, title = {Comparative genomics analysis of the reason for [12]C[6+] heavy-ion irradiation in improving Fe3O4 nanoparticle yield of Acidithiobacillus ferrooxidans.}, journal = {Ecotoxicology and environmental safety}, volume = {289}, number = {}, pages = {117668}, doi = {10.1016/j.ecoenv.2025.117668}, pmid = {39799915}, issn = {1090-2414}, abstract = {The Fe3O4 nanoparticle synthesized by Acidithiobacillus ferrooxidans have a broad practical value, while the low yield limits their commercial application. Herein, we employed a [12]C[6+] heavy-ion beam to induce mutagenesis of A. ferrooxidans BYM and successfully screened a mutant BYMT-200 with a 1.36 mg/L Fe3O4 nanoparticle yield, which could stably inherit over many generations based on assessing cell magnetism and Fe3O4 nanoparticle synthesis. Comparative genome analysis detected 14 mutation sites, causing six synonymous mutations, one missense mutation, and one nonsense mutation. We further annotated the genes involved in the mutation, such as hcp, hsdM, yghU, K7B00_11365, and K7B00_11355, which are responsible for the substantial changes in the Fe3O4 nanoparticle yield of A. ferrooxidans. Additionally, we performed a pan-genome analysis to understand how these genes regulate Fe3O4 nanoparticle synthesis. The core genome of 2376 orthologous clusters was identified and visualized by progressive Mauve alignment and OrthoVenn. A total of 109 regulatory genes related to iron metabolism were identified, mainly involved in electron transport, iron acquisition, iron storage, and oxidative stress. The mutant genes are closely related to iron-sulfur clusters and oxidative stress. Accordingly, we proposed a hypothetical mechanism for increasing Fe3O4 nanoparticle production in A. ferrooxidans BYMT-200 to withstand high oxidative stress caused by heavy ion radiation. Our study offers significant theoretical guidance for further acquiring the high-yield Fe3O4 nanoparticle-producing bacteria and studying the mechanism of its synthesis.}, } @article {pmid39799174, year = {2025}, author = {Shin, HD and Park, W and Chai, HH and Lee, Y and Jung, J and Ko, BJ and Kim, H}, title = {Chromosome-level Genome Assembly of Korean Long-tailed Chicken and Pangenome of 40 Gallus gallus Assemblies.}, journal = {Scientific data}, volume = {12}, number = {1}, pages = {51}, pmid = {39799174}, issn = {2052-4463}, support = {PJ013341//Rural Development Administration (RDA)/ ; }, mesh = {Animals ; *Chickens/genetics ; *Genome ; *Chromosomes ; Republic of Korea ; }, abstract = {This study presents the first chromosome-level genome assembly of the Korean long-tailed chicken (KLC), a unique breed of Gallus gallus known as Ginkkoridak. Our assembly achieved a super contig N50 of 5.7 Mbp and a scaffold N50 exceeding 90 Mb, with a genome completeness of 96.3% as assessed by BUSCO using the aves_odb10 set. We also constructed a comprehensive pangenome graph, incorporating 40 Gallus gallus assemblies, including the KLC genome. This graph comprises 87,934,214 nodes, 121,720,974 edges, and a total sequence length of 1,709,850,352 bp. Notably, our KLC assembly contributed 1,919,925 bp of new sequences to the pangenome, underscoring the unique genetic makeup of this breed. Furthermore, in comparison with the pangenome, we identified 36,818 structural variants in KLC, which included 2,529 insertions, 27,743 deletions, and 6,546 of either insertions or deletions shorter than 1 kb. We also successfully identified pan-genome wide non-reference sequences. Our KLC assembly and pangenome graph provide valuable genomic resources for studying G. gallus populations.}, } @article {pmid39797734, year = {2025}, author = {Ryu, H and Han, H and Kim, C and Kim, J}, title = {GDBr: genomic signature interpretation tool for DNA double-strand break repair mechanisms.}, journal = {Nucleic acids research}, volume = {53}, number = {2}, pages = {}, pmid = {39797734}, issn = {1362-4962}, support = {RS-2023-00247499//National Research Foundation of Korea/ ; CRC22013-300//National Research Council of Science and Technology/ ; HI22C132200//Korea Health Industry Development Institute/Republic of Korea ; //Korea Research Institute of Bioscience and Biotechnology/ ; CRC22013-300//NST grant funded by the Korea government (MSIT)/ ; }, mesh = {*DNA Breaks, Double-Stranded ; Humans ; *Software ; *Genomics/methods ; *DNA Repair/genetics ; Genome, Human ; DNA End-Joining Repair/genetics ; Homologous Recombination ; }, abstract = {Large genetic variants can be generated via homologous recombination (HR), such as polymerase theta-mediated end joining (TMEJ) or single-strand annealing (SSA). Given that these HR-based mechanisms leave specific genomic signatures, we developed GDBr, a genomic signature interpretation tool for DNA double-strand break repair mechanisms using high-quality genome assemblies. We applied GDBr to a draft human pangenome reference. We found that 78.1% of non-repetitive insertions and deletions and 11.0% of non-repetitive complex substitutions contained specific signatures. Of these, we interpreted that 98.7% and 1.3% of the insertions and deletions were generated via TMEJ and SSA, respectively, and all complex substitutions via TMEJ. Since population-level pangenome datasets are being dramatically accumulated, GDBr can provide mechanistic insights into how variants are formed. GDBr is available on GitHub at https://github.com/Chemical118/GDBr.}, } @article {pmid39795357, year = {2025}, author = {Jung, H and Han, G and Lee, D and Jung, HK and Kim, YS and Kong, HJ and Kim, YO and Seo, YS and Park, J}, title = {Understanding the Impact of Salt Stress on Plant Pathogens Through Phenotypic and Transcriptomic Analysis.}, journal = {Plants (Basel, Switzerland)}, volume = {14}, number = {1}, pages = {}, doi = {10.3390/plants14010097}, pmid = {39795357}, issn = {2223-7747}, support = {R2024019//National Institute of Fisheries Science/ ; }, abstract = {For plant diseases to become established, plant pathogens require not only virulence factors and susceptible hosts, but also optimal environmental conditions. The accumulation of high soil salinity can have serious impacts on agro-biological ecosystems. However, the interactions between plant pathogens and salinity have not been fully characterized. This study investigated the effects of salt stress on representative plant pathogens, such as Burkholderia gladioli, Burkholderia glumae, Pectobacterium carotovorum subsp. carotovorum (Pcc), Ralstonia solanacearum, and Xanthomonas oryzae pv. oryzae. Phenotypic assays revealed that B. gladioli and R. solanacearum are highly sensitive to salt stress, exhibiting significant reductions in growth, motility, and enzyme production, whereas Pcc showed notable tolerance. Pan-genome-based comparative transcriptomics identified co-downregulated patterns in B. gladioli and R. solanacearum under stress conditions, indicating the suppression of bacterial chemotaxis and type III secretion systems. Uniquely upregulated patterns in Pcc were associated with enhanced survival under high salinity, such as protein quality control, osmotic equilibrium, and iron acquisition. Additionally, the application of salt stress combined with the beneficial bacterium Chryseobacterium salivictor significantly reduced tomato wilt caused by R. solanacearum, suggesting a potential management strategy. This study underscores practical implications for effectively understanding and controlling plant pathogens under future climate changes involving salt stress.}, } @article {pmid39794865, year = {2025}, author = {Passarelli-Araujo, H and Venancio, TM and Hanage, WP}, title = {Relating ecological diversity to genetic discontinuity across bacterial species.}, journal = {Genome biology}, volume = {26}, number = {1}, pages = {8}, pmid = {39794865}, issn = {1474-760X}, mesh = {*Genome, Bacterial ; *Bacteria/genetics/classification ; Genetic Variation ; Phylogeny ; Mycobacterium tuberculosis/genetics ; Machine Learning ; Evolution, Molecular ; Biodiversity ; }, abstract = {BACKGROUND: Genetic discontinuity represents abrupt breaks in genomic identity among species. Advances in genome sequencing have enhanced our ability to track and characterize genetic discontinuity in bacterial populations. However, exploring the degree to which bacterial diversity exists as a continuum or sorted into discrete and readily defined species remains a challenge in microbial ecology. Here, we aim to quantify the genetic discontinuity (δ) and investigate how this metric is related to ecology.

RESULTS: We harness a dataset comprising 210,129 genomes to systematically explore genetic discontinuity patterns across several distantly related species, finding clear breakpoints which vary depending on the taxa in question. By delving into pangenome characteristics, we uncover a significant association between pangenome saturation and genetic discontinuity. Closed pangenomes are associated with more pronounced breaks, exemplified by Mycobacterium tuberculosis. Additionally, through a machine learning approach, we detect key features such as gene conservation patterns and functional annotations that significantly impact genetic discontinuity prediction.

CONCLUSIONS: Our study clarifies bacterial genetic patterns and their ecological impacts, enhancing the delineation of species boundaries and deepening our understanding of microbial diversity.}, } @article {pmid39791953, year = {2025}, author = {Huang, Y and Sahu, SK and Liu, X}, title = {Deciphering recent transposition patterns in plants through comparison of 811 genome assemblies.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14570}, pmid = {39791953}, issn = {1467-7652}, abstract = {Transposable elements (TEs) are significant drivers of genome evolution, yet their recent dynamics and impacts within and among species, as well as the roles of host genes and non-coding RNAs in the transposition process, remain elusive. With advancements in large-scale pan-genome sequencing and the development of open data sharing, large-scale comparative genomics studies have become feasible. Here, we performed complete de novo TE annotations and identified active TEs in 310 plant genome assemblies across 119 species and seven crop populations. Using 811 high-quality genomes, we detected 13 844 553 TE-induced structural variants (TE-SVs), providing unprecedented resolution in delineating recent TE activities. Our integrative analysis revealed a mutual evolutionary relationship between TEs and host genomes. On one hand, host genes and ncRNAs are involved in the transposition process, as evidenced by their colocalization and coactivation with TEs, and may play a role in chromatin regulation. On the other hand, TEs drive genetic innovation by promoting the duplication of host genes and inserting into regulatory regions. Moreover, genes influenced by active TEs are linked to plant growth, nutrient absorption, storage metabolism and environmental adaptation, aiding in crop domestication and adaptation. This TE dynamics atlas not only reveals evolutionary and functional features linked to transposition activity but also highlights the role of TEs in crop domestication and adaptation, paving the way for future exploration of TE-mediated genome evolution and crop improvement strategies.}, } @article {pmid39789611, year = {2025}, author = {Chen, W and Xie, Q and Fu, J and Li, S and Shi, Y and Lu, J and Zhang, Y and Zhao, Y and Ma, R and Li, B and Zhang, B and Grierson, D and Yu, M and Fei, Z and Chen, K}, title = {Graph pangenome reveals the regulation of malate content in blood-fleshed peach by NAC transcription factors.}, journal = {Genome biology}, volume = {26}, number = {1}, pages = {7}, pmid = {39789611}, issn = {1474-760X}, mesh = {*Malates/metabolism ; *Prunus persica/genetics/metabolism ; *Transcription Factors/metabolism/genetics ; *Fruit/genetics/metabolism ; *Plant Proteins/genetics/metabolism ; *Genome, Plant ; Gene Expression Regulation, Plant ; Genome-Wide Association Study ; Promoter Regions, Genetic ; }, abstract = {BACKGROUND: Fruit acidity and color are important quality attributes in peaches. Although there are some exceptions, blood-fleshed peaches typically have a sour taste. However, little is known about the genetic variations linking organic acid and color regulation in peaches.

RESULTS: Here, we report a peach graph-based pangenome constructed from sixteen individual genome assemblies, capturing abundant structural variations and 82.3 Mb of sequences absent in the reference genome. Pangenome analysis reveals a long terminal repeat retrotransposon insertion in the promoter of the NAC transcription factor (TF) PpBL in blood-fleshed peaches, which enhances PpBL expression. Genome-wide association study identifies a significant association between PpBL and malate content. Silencing PpBL in peach fruit and ectopic overexpression of PpBL in tomatoes confirm that PpBL is a positive regulator of malate accumulation. Furthermore, we demonstrate that PpBL works synergistically with another NAC TF, PpNAC1, to activate the transcription of the aluminum-activated malate transporter PpALMT4, leading to increased malate content.

CONCLUSIONS: These findings, along with previous research showing that PpBL and PpNAC1 also regulate anthocyanin accumulation, explain the red coloration and sour taste in blood-fleshed peach fruits.}, } @article {pmid39789465, year = {2025}, author = {Yacoub, E and Baby, V and Sirand-Pugnet, P and Arfi, Y and Mardassi, H and Blanchard, A and Chibani, S and Ben Abdelmoumen Mardassi, B}, title = {A sweeping view of avian mycoplasmas biology drawn from comparative genomic analyses.}, journal = {BMC genomics}, volume = {26}, number = {1}, pages = {24}, pmid = {39789465}, issn = {1471-2164}, abstract = {BACKGROUND: Avian mycoplasmas are small bacteria associated with several pathogenic conditions in many wild and poultry bird species. Extensive genomic data are available for many avian mycoplasmas, yet no comparative studies focusing on this group of mycoplasmas have been undertaken so far.

RESULTS: Here, based on the comparison of forty avian mycoplasma genomes belonging to ten different species, we provide insightful information on the phylogeny, pan/core genome, energetic metabolism, and virulence of these avian pathogens. Analyses disclosed considerable inter- and intra-species genomic variabilities, with genome sizes that can vary by twice as much. Phylogenetic analysis based on concatenated orthologous genes revealed that avian mycoplasmas fell into either Hominis or Pneumoniae groups within the Mollicutes and could split into various clusters. No host co-evolution of avian mycoplasmas can be inferred from the proposed phylogenetic scheme. With 3,237 different gene clusters, the avian mycoplasma group under study proved diverse enough to have an open pan genome. However, a set of 150 gene clusters was found to be shared between all avian mycoplasmas, which is likely encoding essential functions. Comparison of energy metabolism pathways showed that avian mycoplasmas rely on various sources of energy. Superposition between phylogenetic and energy metabolism groups revealed that the glycolytic mycoplasmas belong to two distinct phylogenetic groups (Hominis and Pneumoniae), while all the arginine-utilizing mycoplasmas belong only to Hominis group. This can stand for different evolutionary strategies followed by avian mycoplasmas and further emphasizes the diversity within this group. Virulence determinants survey showed that the involved gene arsenals vary significantly within and between species, and could even be found in species often reported apathogenic. Immunoglobulin-blocking proteins were detected in almost all avian mycoplasmas. Although these systems are not exclusive to this group, they seem to present some particular features making them unique among mycoplasmas.

CONCLUSION: This comparative genomic study uncovered the significant variable nature of avian mycoplasmas, furthering our knowledge on their biological attributes and evoking new hallmarks.}, } @article {pmid39780644, year = {2025}, author = {Yasuoka, K and Gotoh, Y and Taniguchi, I and Nagano, DS and Nakamura, K and Mizuno, Y and Abe, T and Ogura, Y and Nakajima, H and Uesugi, M and Miura, M and Seto, K and Wakabayashi, Y and Isobe, J and Watari, T and Senda, S and Hayakawa, N and Ogawa, E and Sato, T and Nanishi, E and Sakai, Y and Kato, A and Miyata, I and Ouchi, K and Ohga, S and Hara, T and Hayashi, T}, title = {Genome Analysis of Japanese Yersinia pseudotuberculosis Strains Isolated From Kawasaki Disease Patients and Other Sources and Their Phylogenetic Positions in the Global Y. pseudotuberculosis Population.}, journal = {Microbiology and immunology}, volume = {}, number = {}, pages = {}, doi = {10.1111/1348-0421.13199}, pmid = {39780644}, issn = {1348-0421}, support = {//This work was supported by a grant for Kawasaki Disease Research from the Japan Blood Products Organization and JSPS KAKENHI (Grant Number 23k15366)./ ; }, abstract = {Yersinia pseudotuberculosis (Ypt) is a gram-negative bacterium that infects both humans and animals primarily through fecal‒oral transmission. While Ypt causes acute gastroenteritis in humans, an association with Kawasaki disease (KD), a disease that primarily affects infants and young children and causes multisystemic vasculitis, has also been suspected. Although KD represents a significant health concern worldwide, the highest annual incidence rate is reported in Japan. Previously, a geographical origin-dependent population structure of Ypt comprising the Asian, transitional, and European clades was proposed. However, genomic data on KD-associated Ypt strains is currently unavailable. In this study, to analyze the phylogenetic and genomic features of KD-associated strains, we determined the whole-genome sequences of 35 Japanese Ypt strains, including 11 KD-associated strains, and constructed a genome set (n = 204) representing the global population of Ypt by adding publicly available Ypt genomes. In a phylogenetic analysis, all sequenced Japanese strains, including the KD-associated strains, belonged to the Asian clade, which appeared to be the ancestral clade of Ypt, and the KD-associated strains belonged to multiple lineages in this clade. Strains from patients with Far East scarlet-like fever (FESLF), a KD-related disease, also belonged to the Asian clade. Moreover, no KD strain-specific genes were identified in pan-genome-wide association study analyses. Notably, however, the gene encoding a superantigen called Yersinia pseudotuberculosis-derived mitogen (YPM) showed a distribution pattern highly biased to the Asian clade. Although further studies are needed, our results suggest that Asian clade strains may have a greater potential to trigger KD.}, } @article {pmid39779953, year = {2025}, author = {Secomandi, S and Gallo, GR and Rossi, R and Rodríguez Fernandes, C and Jarvis, ED and Bonisoli-Alquati, A and Gianfranceschi, L and Formenti, G}, title = {Pangenome graphs and their applications in biodiversity genomics.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {39779953}, issn = {1546-1718}, abstract = {Complete datasets of genetic variants are key to biodiversity genomic studies. Long-read sequencing technologies allow the routine assembly of highly contiguous, haplotype-resolved reference genomes. However, even when complete, reference genomes from a single individual may bias downstream analyses and fail to adequately represent genetic diversity within a population or species. Pangenome graphs assembled from aligned collections of high-quality genomes can overcome representation bias by integrating sequence information from multiple genomes from the same population, species or genus into a single reference. Here, we review the available tools and data structures to build, visualize and manipulate pangenome graphs while providing practical examples and discussing their applications in biodiversity and conservation genomics across the tree of life.}, } @article {pmid39777507, year = {2025}, author = {Shelton, AN and Yu, FB and Grossman, AR and Bhaya, D}, title = {Abundant and active community members respond to diel cycles in hot spring phototrophic mats.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wraf001}, pmid = {39777507}, issn = {1751-7370}, abstract = {Photosynthetic microbial mats in hot springs can provide insights into the diel behaviors of communities in extreme environments. In this habitat, photosynthesis dominates during the day, leading to super-oxic conditions, with a rapid transition to fermentation and anoxia at night. Multiple samples were collected from two springs over several years to generate metagenomic and metatranscriptomic datasets. Metagenome assembled genomes comprised 71 taxa (in 19 different phyla), of which twelve core taxa were present at high abundance in both springs. The eight most active taxa identified by metatranscriptomics were an oxygenic cyanobacterium (Synechococcus sp.), five anoxygenic phototrophs from three different phyla, and two understudied heterotrophs from phylum Armatimonadota. In all eight taxa, a significant fraction of genes exhibited a diel expression pattern although peak timing varied considerably. The two abundant heterotrophs exhibit starkly different peak timing of expression, which we propose is shaped by their metabolic and genomic potential to use carbon sources that become differentially available during the diel cycle. Network analysis revealed pathway expression patterns that had not previously been linked to diel cycles, including ribosome biogenesis and chaperones. This provides a framework for analyzing metabolically coupled communities and the dominant role of the diel cycle.}, } @article {pmid39772210, year = {2024}, author = {Jdeed, G and Morozova, VV and Tikunova, NV}, title = {Genome Analysis of Anti-Phage Defense Systems and Defense Islands in Stenotrophomonas maltophilia: Preservation and Variability.}, journal = {Viruses}, volume = {16}, number = {12}, pages = {}, doi = {10.3390/v16121903}, pmid = {39772210}, issn = {1999-4915}, support = {075-15-2021-1085//the Ministry of Science and Higher Education of the Russian Federation/ ; }, mesh = {*Stenotrophomonas maltophilia/genetics/virology ; *Bacteriophages/genetics/physiology ; *Phylogeny ; *Genome, Bacterial ; Genomic Islands ; }, abstract = {Anti-phage defense systems are widespread in bacteria due to the latter continuous adaptation to infection by bacteriophages (phages). Stenotrophomonas maltophilia has a high degree of intrinsic antibiotic resistance, which makes phage therapy relevant for the treatment of infections caused by this species. Studying the array of anti-phage defense systems that could be found in S. maltophilia helps in better adapting the phages to the systems present in the pathogenic bacteria. Pangenome analysis of the available S. maltophilia strains with complete genomes that were downloaded from GenBank, including five local genomes, indicated a wide set of 72 defense systems and subsystems that varied between the strains. Seven of these systems were present in more than 20% of the studied genomes and the proteins encoded by the systems were variable in most of the cases. A total of 27 defense islands were revealed where defense systems were found; however, more than 60% of the instances of systems were found in four defense islands. Several elements linked to the transfer of these systems were found. No obvious associations between the pattern of distribution of the anti-phage defense systems of S. maltophilia and the phylogenetic features or the isolation site were found.}, } @article {pmid39770839, year = {2024}, author = {Gonzalez-Silva, A and San Juan-Mendo, M and Delgado-Prudencio, G and Hernández-García, JA and Larios-Serrato, V and Aguilar, C and Villa-Tanaca, L and Hernández-Rodríguez, C}, title = {Comparative Genomics and Biosynthetic Cluster Analysis of Antifungal Secondary Metabolites of Three Strains of Streptomyces albidoflavus Isolated from Rhizospheric Soils.}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122637}, pmid = {39770839}, issn = {2076-2607}, support = {CB 283225//Consejo Nacional de Humanidades, Ciencia y Tecnología/ ; SIP 20220742, 20220795, 20231480, 20231481, 20240945, 20240946//Instituto Politécnico Nacional/ ; }, abstract = {Streptomyces is a genus of Gram-positive bacteria with high GC content. It remains attractive for studying and discovering new antibiotics, antifungals, and chemotherapeutics. Streptomyces genomes can contain more than 30 cryptic and expressed biosynthetic gene clusters (BGC) encoding secondary metabolites. In this study, three Streptomyces strains isolated from jungle rhizospheric soil exhibited supernatants that can inhibit sensitive and fluconazole-resistant Candida spp. The genomes of the strains Streptomyces sp. A1, J25, J29 ori2 were sequenced, assembled de novo, and analyzed. The genome assemblies revealed that the size of the genomes was 6.9 Mb, with linear topology and 73.5% GC. A phylogenomic approach identified the strains with high similitudes between 98.5 and 98.7% with Streptomyces albidoflavus SM254 and R-53649 strains, respectively. Pangenomic analysis of eight genomes of S. albidoflavus strains deposited in the Genomes database recognized 4707 core protein orthogroups and 745 abundant accessory and exclusive protein orthogroups, suggesting an open pangenome in this species. The antiSMASH software detected candicidin and surugamide BGC-encoding polyene and octapeptide antifungal secondary metabolites in other S. albidoflavus. CORASON software was used to compare the synteny, and the abundance of genes harbored in the clusters was used. In conclusion, although the three strains belong to the same species, each possesses a distinct genome, as evidenced by the different phenotypes, including antifungal and extracellular enzymatic activities.}, } @article {pmid39770825, year = {2024}, author = {Heidarpanah, S and Li, K and Thibodeau, A and Meniaï, I and Parreira, VR and Quessy, S and Segura, M and Fittipaldi, N and Gaucher, ML}, title = {Genomic Diversity and Virulence Factors of Clostridium perfringens Isolated from Healthy and Necrotic Enteritis-Affected Broiler Chicken Farms in Quebec Province.}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122624}, pmid = {39770825}, issn = {2076-2607}, support = {IT09545//Mitacs/ ; }, abstract = {Avian necrotic enteritis due to the Gram-positive bacterium Clostridium perfringens has re-emerged following the ban on antibiotic growth promoters in many poultry producing countries. The limited number of previous studies has left important gaps in our understanding of the genetic diversity and virulence traits of the pathogen. To address these knowledge gaps, in this study, we sequenced the genomes of 41 Clostridium perfringens isolates recovered from commercial broiler chicken flocks in Quebec, Canada, including isolates from healthy birds and those affected by necrotic enteritis. We sought to understand the pangenome diversity and interrogated the genomes for key virulence factors involved in necrotic enteritis pathogenesis. On average, the genomes had a GC content of 28% and contained 3206 coding sequences. A variable presence of toxins, degradative hydrolytic enzymes, and collagen-binding proteins was also found. Through pangenome analysis, we revealed a total of 10,223 genes, 652 (6.4%) of which formed the core genome. Additionally, we identified 17 different plasmids, 12 antibiotic resistance genes, and nine prophage regions. Overall, our results demonstrated a relatively high genetic diversity among chicken Clostridium perfringens isolates collected from the same geographical location, offering new insights into potential virulence mechanisms and adaptation of the pathogen within poultry populations.}, } @article {pmid39770754, year = {2024}, author = {Bidzhieva, SK and Tourova, TP and Grouzdev, DS and Samigullina, SR and Sokolova, DS and Poltaraus, AB and Avtukh, AN and Tereshina, VM and Mardanov, AV and Zhaparov, NS and Nazina, TN}, title = {Sulfate-Reducing Bacteria Isolated from an Oil Field in Kazakhstan and a Description of Pseudodesulfovibrio karagichevae sp. nov.}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122552}, pmid = {39770754}, issn = {2076-2607}, support = {21-64-00019//Russian Science Foundation/ ; }, abstract = {Sulfidogenic bacteria cause numerous issues in the oil industry since they produce sulfide, corroding steel equipment, reducing oil quality, and worsening the environmental conditions in oil fields. The purpose of this work was to isolate and taxonomically identify the sulfidogenic bacteria responsible for the corrosion of steel equipment at the Karazhanbas oil field (Kazakhstan). In this study, we characterized five sulfidogenic strains of the genera Pseudodesulfovibrio, Oleidesulfovibrio, and Acetobacterium isolated from the formation water of the Karazhanbas oil field (Kazakhstan). Sulfate-reducing strain 9FUS[T] revealed 98.9% similarity of the 16S rRNA gene sequence with the closely related strain 'Pseudodesulfovibrio methanolicus' 5S69[T] and was studied in detail to enhance the taxonomic resolution. Strain 9FUS[T] grew optimally at 23-28 °C, pH 6.5, and 0-2% (w/v) NaCl. The strain used lactate, pyruvate, methanol, ethanol, fructose, ribose, and H2/CO2 (in the presence of acetate) as carbon and energy sources for sulfate reduction. Iso-C17:1 ω11, C15:0, iso-C15:0, and C16:0 were the predominant fatty acids. The genome is 4.20 Mbp with a G + C content of 64.0%. The average nucleotide identity and digital DNA-DNA hybridization values with Pseudodesulfovibrio spp. genomes were 72.5-91.6% (<95%) and 18.5-45.0% (<70%), respectively, and supported our conclusion that 9FUS[T] (=VKM B-3654[T] = KCTC 25498[T]) belonged to a novel Pseudodesulfovibrio species, for which the name Pseudodesulfovibrio karagichevae sp. nov. is proposed. Pangenome analysis of sixteen Pseudodesulfovibrio species and functional annotation analysis of identified genes revealed complete modules of enzymes of the main metabolic pathways, characteristic of bacteria of this genus, and unique genes highlighting the adaptations of strain 9FUS[T] in carbohydrate metabolism, nutrient uptake, and environmental stress response. Isolation of these strains expands our understanding of the diversity of sulfidogens in oil reservoirs and can be used to test the effectiveness of biocides used in an oil field.}, } @article {pmid39770704, year = {2024}, author = {You, Y and Xu, X and Liu, H and Zhang, L}, title = {Locust Pathogen Aspergillus oryzae XJ1 Is Different from Aspergillus oryzae and Aspergillus flavus Based on Genomics Comparisons.}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122501}, pmid = {39770704}, issn = {2076-2607}, support = {CXGC2024F05, CXGC2024D05//Agricultural scientific and technological innovation project of Shandong Academy of Ag-ricultural Sciences/ ; ZR2022MC117//Shandong Provincial Natural Science Foundation/ ; }, abstract = {Fungi play an increasingly important role in the biological control of insect pests. Aspergillus oryzae XJ1 is highly virulent to locust adults and nymphs, which are a destructive economic pest worldwide. Because of its host association with locusts, which is unique in Aspergillus, in this study, we examined the genetic relationships of A. oryzae XJ1 within Aspergillus. We sequenced the genome of A. oryzae XJ1 and compared it with the genomes of other Aspergillus species. The complete genome of A. oryzae XJ1 is 37.9 Mb, comprising 11,720 putative genes, assembled into eight chromosomes. The genome size is similar to that of other A. oryzae strains. Phylogenomic analysis indicated that A. oryzae XJ1 was most closely related to A. flavus NRRL3357, not A. oryzae RIB40. Core/pan-genome analysis of A. oryzae XJ1 and other Aspergillus species revealed that A. oryzae XJ1 had 704 strain-specific genes, whereas A. flavus NRRL3357, A. oryzae KDG 21, and A. parasiticus NRRL 2999 had 646, 955, and 779 unique genes, respectively. The A. oryzae XJ1 genome showed structural differences compared with the genomes of A. oryzae RIB40 and A. flavus NRRL3357 in genomic synteny analysis. These results indicate that A. oryzae XJ1 is genetically distinct at the genome level from other Aspergillus species, including A. oryzae and A. flavus, and may be as a distinct species. This will provide new insight into the classification of Aspergillus based on genomics.}, } @article {pmid39770688, year = {2024}, author = {Reynoso, EC and Delgado-Suárez, EJ and Hernández-Pérez, CF and Chavarin-Pineda, Y and Godoy-Lozano, EE and Fierros-Zárate, G and Aguilar-Vera, OA and Castillo-Ramírez, S and Gómez-Pedroso, LDCS and Sánchez-Zamorano, LM}, title = {Geography, Antimicrobial Resistance, and Genomics of Salmonella enterica (Serotypes Newport and Anatum) from Meat in Mexico (2021-2023).}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122485}, pmid = {39770688}, issn = {2076-2607}, support = {CF-2020-87198//Consejo Nacional de Humanidades, Ciencias y Tecnologías/ ; I1200/311/2023//Consejo Nacional de Humanidades, Ciencias y Tecnologías/ ; }, abstract = {Salmonella enterica non-typhoidal is a major contributor to diarrheal diseases, with over 2600 serovars identified across diverse environments. In Mexico, serovars Newport and Anatum have shown a marked increase, especially in foodborne disease, posing a public health problem. We conducted a cross-sectional study from 2021 to 2023 using active epidemiological surveillance to assess contamination in ground beef and pork at butcher shops nationwide. It involved isolation, phenotypic antimicrobial resistance, comparative genomics, spatial distribution, antimicrobial-resistance genes, and pangenome analysis. A total of 402 non-typhoidal S. enterica strains were isolated, including 59 Newport and 50 Anatum. After curating for redundancy, 45 Newport and 32 Anatum strains remained. We found that 75% of Newport strains exhibited multidrug resistance (MDR), compared to 25% of Anatum strains. Salmonella Newport also showed a broader distribution and stronger antibiotic-resistance capacity, particularly due to genes such as mphA and ramA. Our pangenome analysis showed a predominance of cell maintenance and survival-process genes in the accessory genome of both serotypes. Considering unique genes, Salmonella Anatum and Newport showed a notorious abundance of genes with functions related to replication, recombination, and repair. The substantial rise of Anatum and Newport strains in meat samples for human consumption presents an epidemiological alert, highlighting the critical need for stringent surveillance programs to mitigate human and ecosystem health risks.}, } @article {pmid39770679, year = {2024}, author = {Mwamburi, SM and Islam, SI and Dinh-Hung, N and Dangsawat, O and Sowanpreecha, R and Khang, LTP and Montha, N and Therdtatha, P and Dwinanti, SH and Permpoonpattana, P and Linh, NV}, title = {Genomic Characterization of Bacillus sp. THPS1: A Hot Spring-Derived Species with Functional Features and Biotechnological Potential.}, journal = {Microorganisms}, volume = {12}, number = {12}, pages = {}, doi = {10.3390/microorganisms12122476}, pmid = {39770679}, issn = {2076-2607}, abstract = {Bacillus sp. THPS1 is a novel strain isolated from a high-temperature hot spring in Thailand, exhibiting distinctive genomic features that enable adaptation to an extreme environment. This study aimed to characterize the genomic and functional attributes of Bacillus sp. THPS1 to understand its adaptation strategies and evaluate its potential for biotechnological applications. The draft genome is 5.38 Mbp with a GC content of 35.67%, encoding 5606 genes, including those linked to stress response and sporulation, which are essential for survival in high-temperature conditions. Phylogenetic analysis and average nucleotide identity (ANI) values confirmed its classification as a distinct species within the Bacillus genus. Pangenome analysis involving 19 others closely related thermophilic Bacillus species identified 1888 singleton genes associated with heat resistance, sporulation, and specialized metabolism, suggesting adaptation to nutrient-deficient, high-temperature environments. Genomic analysis revealed 12 biosynthetic gene clusters (BGCs), including those for polyketides and non-ribosomal peptides, highlighting its potential for synthesizing secondary metabolites that may facilitate its adaptation. Additionally, the presence of three Siphoviridae phage regions and 96 mobile genetic elements (MGEs) suggests significant genomic plasticity, whereas the existence of five CRISPR arrays implies an advanced defense mechanism against phage infections, contributing to genomic stability. The distinctive genomic features and functional capacities of Bacillus sp. THPS1 make it a promising candidate for biotechnological applications, particularly in the production of heat-stable enzymes and the development of resilient bioformulations.}, } @article {pmid39768397, year = {2024}, author = {Klaysubun, C and Chaichana, N and Suwannasin, S and Singkhamanan, K and Yaikhan, T and Kantachote, D and Pomwised, R and Wonglapsuwan, M and Surachat, K}, title = {Genomic Characterization of Probiotic Purple Nonsulfur Bacteria Cereibacter sphaeroides Strains S3W10 and SS15: Implications for Enhanced Shrimp Aquaculture.}, journal = {Life (Basel, Switzerland)}, volume = {14}, number = {12}, pages = {}, doi = {10.3390/life14121691}, pmid = {39768397}, issn = {2075-1729}, support = {B13F660074 and B13F670076//NSRF via the Program Management Unit for Human Resources and Institutional Development, Research and Innovation/ ; }, abstract = {Cereibacter sphaeroides strains S3W10 and SS15, isolated from shrimp ponds, exhibit potential probiotic benefits for aquaculture. In this study, the genomic features of S3W10 and SS15 were thoroughly characterized to evaluate their probiotic properties and safety for aquaculture use. The genomes of S3W10 and SS15 consist of 130 and 74 contigs, with sizes of 4.6 Mb and 4.4 Mb and GC contents of 69.2%. Average nucleotide identity (ANI), digital DNA-DNA hybridization (dDDH), and phylogenomic analyses confirmed that these strains belong to C. sphaeroides. Genome annotation predicted 4260 coding sequences (CDS) in S3W10 and 4086 CDS in SS15, including genes associated with stress tolerance, nutrient absorption, and antioxidant activity. Notably, genes related to vitamin B12 synthesis, digestive enzyme production, and carotenoid biosynthesis, which support shrimp health, were identified in both genomes. CAZyme analysis identified 116 and 115 carbohydrate-active enzymes in S3W10 and SS15, respectively, supporting adaptation to gastrointestinal environments and the host immune response. Pan-genome analysis across C. sphaeroides strains revealed 7918 gene clusters, highlighting the open pan-genome structure of this species and its high genetic diversity. Further bioinformatic analyses assessing mobile genetic elements, antibiotic-resistance genes, and virulence factors demonstrated the safety of both strains for aquaculture, as no plasmids or virulence genes were identified. The genomic insights in this study provide a deeper understanding of the strains' adaptability and functional potential, aligning with previous in vitro and in vivo studies and highlighting their potential for use in shrimp cultivation.}, } @article {pmid39768396, year = {2024}, author = {Adedibu, PA and Son, O and Tekutyeva, L and Balabanova, L}, title = {Pathogenomic Insights into Xanthomonas oryzae pv. oryzae's Resistome, Virulome, and Diversity for Improved Rice Blight Management.}, journal = {Life (Basel, Switzerland)}, volume = {14}, number = {12}, pages = {}, doi = {10.3390/life14121690}, pmid = {39768396}, issn = {2075-1729}, support = {15.BRK.21.0004 (Contract No. 075-15-2021-1052/9)//Ministry of Science and Higher Education of Russian Federation/ ; }, abstract = {Oryza sativa (rice) is a major staple food targeted for increased production to achieve food security. However, increased production is threatened by several biotic and abiotic factors, of which bacterial blight disease caused by Xanthomonas oryzae pathovar oryzae is severe. Developing effective control strategies requires an up-to-date understanding of its pathogenomics. This study analyzes the genomes of 30 X. oryzae strains collected from rice-producing regions across five continents to identify genetic elements critical for its pathogenicity and adaptability and for an intraspecific diversity assessment using advanced genomics and bioinformatics tools. Resistome analysis revealed 28 distinct types of antibiotic resistance genes (ARGs), both innate and acquired, indicating a growing threat from multidrug-resistant X. oryzae strains. Sixteen virulent genes, including type III and VI secretion systems, motility genes, and effector proteins, were identified. A unique 'MexCD-OprJ' multidrug efflux system was detected in the Tanzanian strains, conferring resistance to multiple antibiotic classes. To curb further ARG emergence, there is a need to regulate the use of antibiotics for X. oryzae control and adopt resistant rice varieties. Transposable elements were also discovered to contribute to X. oryzae pathogenicity, facilitating the horizontal transfer of virulence genes. Pangenome analysis revealed intraspecific variation among the population, with 112 unique CDS having diverse functional roles. Strains registered in the Philippines had the most unique genes. Phylogenetic analysis confirmed the divergent evolution of X. oryzae. This study's results will aid in identifying more effective management strategies and biocontrol alternatives for sustainable rice production.}, } @article {pmid39766787, year = {2024}, author = {Muto, Y and Tanaka, K}, title = {Comparative Evolutionary Genomics Reveals Genetic Diversity and Differentiation in Bacteroides fragilis.}, journal = {Genes}, volume = {15}, number = {12}, pages = {}, doi = {10.3390/genes15121519}, pmid = {39766787}, issn = {2073-4425}, support = {22K10486//Japan Society for the Promotion of Science/ ; }, mesh = {*Bacteroides fragilis/genetics/classification ; *Phylogeny ; *Genome, Bacterial ; *Evolution, Molecular ; *Genetic Variation ; *Genomics/methods ; Bacteroides Infections/microbiology/genetics ; }, abstract = {BACKGROUND/OBJECTIVES: Bacteroides fragilis is the pathogenic anaerobe most commonly isolated from intra-abdominal infections, abscesses, and blood. Despite its clinical importance, research on its pan-genome-scale evolution is still limited.

METHODS: Herein, we analyzed the pan-genome architecture of 374 B. fragilis strains to explore their intra-species genomic diversity and evolutionary patterns.

RESULTS: Our analysis revealed an open pan-genome with a high proportion of accessory genomes, indicating high genetic variability. Accessory genome genes were substantially enriched in the functions of "Replication, Recombination, and Repair" suggesting their roles in gene transfer and divergence. Phylogenomic analysis divided B. fragilis into two distinct clades: divisions I and II, differing in gene content, antimicrobial resistance genes, and mobile genetic elements. Division II revealed higher Tajima's D values, suggesting that it separated after B. fragilis's recent species diversification. The extreme shift in the distribution of gene-wise Hudson's fixation index (Fst) values for each division suggested that several genes are highly differentiated or evolved between the two clades. Average nucleotide identity and 16S rRNA analyses showed that B. fragilis division II represents a distinct species, Bacteroides hominis. Additionally, a considerable depletion of recombination in genes with Fst values > 0.99 was noted, suggesting that the highest Fst genes with little recombination are the basis for differentiation between divisions.

CONCLUSIONS: Overall, this study enhances the understanding of B. fragilis's genomic diversity, evolutionary dynamics, and potential role in pathogenesis, shedding light on its adaptation and diversification.}, } @article {pmid39763808, year = {2024}, author = {Ojeda, IG and Palace, SG and Martinez, PP and Azarian, T and Grant, LR and Hammitt, LL and Hanage, WP and Lipsitch, M}, title = {Linkage-based ortholog refinement in bacterial pangenomes with CLARC.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.12.18.629228}, pmid = {39763808}, issn = {2692-8205}, abstract = {Bacterial genomes exhibit significant variation in gene content and sequence identity. Pangenome analyses explore this diversity by classifying genes into core and accessory clusters of orthologous groups (COGs). However, strict sequence identity cutoffs can misclassify divergent alleles as different genes, inflating accessory gene counts. CLARC (Connected Linkage and Alignment Redefinition of COGs) [ https://github.com/IndraGonz/CLARC ] improves pangenome analyses by condensing accessory COGs using functional annotation and linkage information. Through this approach, orthologous groups are consolidated into more practical units of selection. Analyzing 8,000+ Streptococcus pneumoniae genomes, CLARC reduced accessory gene estimates by more than 30% and improved evolutionary predictions based on accessory gene frequencies. By refining COG definitions, CLARC offers critical insights into bacterial evolution, aiding genetic studies across diverse populations.}, } @article {pmid39763645, year = {2024}, author = {Zhou, Q and Ghezelji, M and Hari, A and Ford, MKB and Holley, C and Sahinalp, SC and Numanagić, I}, title = {Geny: a genotyping tool for allelic decomposition of killer cell immunoglobulin-like receptor genes.}, journal = {Frontiers in immunology}, volume = {15}, number = {}, pages = {1494995}, pmid = {39763645}, issn = {1664-3224}, mesh = {*Receptors, KIR/genetics ; Humans ; *Alleles ; *Genotyping Techniques/methods ; *Genotype ; High-Throughput Nucleotide Sequencing/methods ; Computational Biology/methods ; Software ; }, abstract = {INTRODUCTION: Accurate genotyping of Killer cell Immunoglobulin-like Receptor (KIR) genes plays a pivotal role in enhancing our understanding of innate immune responses, disease correlations, and the advancement of personalized medicine. However, due to the high variability of the KIR region and high level of sequence similarity among different KIR genes, the generic genotyping workflows are unable to accurately infer copy numbers and complete genotypes of individual KIR genes from next-generation sequencing data. Thus, specialized genotyping tools are needed to genotype this complex region.

METHODS: Here, we introduce Geny, a new computational tool for precise genotyping of KIR genes. Geny utilizes available KIR allele databases and proposes a novel combination of expectation-maximization filtering schemes and integer linear programming-based combinatorial optimization models to resolve ambiguous reads, provide accurate copy number estimation, and estimate the correct allele of each copy of genes within the KIR region.

RESULTS & DISCUSSION: We evaluated Geny on a large set of simulated short-read datasets covering the known validated KIR region assemblies and a set of Illumina short-read samples sequenced from 40 validated samples from the Human Pangenome Reference Consortium collection and showed that it outperforms the existing state-of-the-art KIR genotyping tools in terms of accuracy, precision, and recall. We envision Geny becoming a valuable resource for understanding immune system response and consequently advancing the field of patient-centric medicine.}, } @article {pmid39759836, year = {2024}, author = {Nesbø, CL and Kublanov, I and Yang, M and Sharan, AA and Meyer, T and Edwards, EA}, title = {High quality Bathyarchaeia MAGs from lignocellulose-impacted environments elucidate metabolism and evolutionary mechanisms.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae156}, pmid = {39759836}, issn = {2730-6151}, abstract = {The archaeal class Bathyarchaeia is widely and abundantly distributed in anoxic habitats. Metagenomic studies have suggested that they are mixotrophic, capable of CO2 fixation and heterotrophic growth, and involved in acetogenesis and lignin degradation. We analyzed 35 Bathyarchaeia metagenome-assembled genomes (MAGs), including the first complete circularized MAG (cMAG) of the Bathy-6 subgroup, from the metagenomes of three full-scale pulp and paper mill anaerobic digesters and three laboratory methanogenic enrichment cultures maintained on pre-treated poplar. Thirty-three MAGs belong to the Bathy-6, lineage while two are from the Bathy-8 lineage. In our previous analysis of the microbial community in the pulp mill digesters, Bathyarchaeia were abundant and positively correlated to hydrogenotrophic and methylotrophic methanogenesis. Several factors likely contribute to the success of the Bathy-6 lineage compared to Bathy-8 in the reactors. The Bathy-6 genomes are larger than those of Bathy-8 and have more genes involved in lignocellulose degradation, including carbohydrate-active enzymes not present in the Bathy-8. Bathy-6 also shares the Bathyarchaeal O-demethylase system recently identified in Bathy-8. All the Bathy-6 MAGs had numerous membrane-associated pyrroloquinoline quinone-domain proteins that we suggest are involved in lignin modification or degradation, together with Radical-S-adenosylmethionine (SAM) and Rieske domain proteins, and AA2, AA3, and AA6-family oxidoreductases. We also identified a complete B12 synthesis pathway and a complete nitrogenase gene locus. Finally, comparative genomic analyses revealed that Bathyarchaeia genomes are dynamic and have interacted with other organisms in their environments through gene transfer to expand their gene repertoire.}, } @article {pmid39758981, year = {2024}, author = {Zakeri, M and Brown, NK and Ahmed, OY and Gagie, T and Langmead, B}, title = {Movi: A fast and cache-efficient full-text pangenome index.}, journal = {iScience}, volume = {27}, number = {12}, pages = {111464}, pmid = {39758981}, issn = {2589-0042}, abstract = {Pangenome indexes are promising tools for many applications, including classification of nanopore sequencing reads. Move structure is a compressed-index data structure based on the Burrows-Wheeler Transform (BWT). It offers simultaneous O(1)-time queries and O(r) space, where r is the number of BWT runs (consecutive sequence of identical characters). We developed Movi based on the move structure for indexing and querying pangenomes. Movi scales very well for repetitive text as its size grows strictly by r. Movi computes sophisticated matching queries for classification such as pseudo-matching lengths and backward search up to 30 times faster than existing methods by minimizing the number of cache misses and using memory prefetching to attain a degree of latency hiding. Movi's fast constant-time query loop makes it well suited to real-time applications like adaptive sampling for nanopore sequencing, where decisions must be made in a small and predictable time interval.}, } @article {pmid39756800, year = {2025}, author = {Jonkheer, EM and de Ridder, D and van der Lee, TAJ and de Haan, JR and Berke, L and Smit, S}, title = {Exploring intra- and intergenomic variation in haplotype-resolved pangenomes.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14545}, pmid = {39756800}, issn = {1467-7652}, support = {ETEC.2019.019//Netherlands eScience Center/ ; //Genetwister Technologies B.V./ ; //Biointeractions and Plant Health, Wageningen Plant Research/ ; }, abstract = {With advances in long-read sequencing and assembly techniques, haplotype-resolved (phased) genome assemblies are becoming more common, also in the field of plant genomics. Computational tools to effectively explore these phased genomes, particularly for polyploid genomes, are currently limited. Here we describe a new strategy adopting a pangenome approach. To analyse both intra- and intergenomic variation in phased genome assemblies, we have made the software package PanTools ploidy-aware by updating the pangenome graph representation and adding several novel functionalities to assess synteny and gene retention, profile repeats and calculate synonymous and nonsynonymous mutation rates. Using PanTools, we constructed and analysed a pangenome comprising of one diploid and four tetraploid potato cultivars, and a pangenome of five diploid apple species. Both pangenomes show high intra- and intergenomic allelic diversity in terms of gene absence/presence, SNPs, indels and larger structural variants. Our findings show that the new functionalities and visualizations are useful to discover introgressions and detect likely misassemblies in phased genomes. PanTools is available at https://git.wur.nl/bioinformatics/pantools.}, } @article {pmid39755794, year = {2025}, author = {Sawaswong, V and Wongjarit, K and Petsong, S and Yuliani, Y and Somsukpiroh, U and Faksri, K and Forde, T and Payungporn, S and Rotcheewaphan, S}, title = {Diversity and antimicrobial resistance profiles of Mycobacterium avium complex clinical isolates in Thailand based on whole genome comparative analysis.}, journal = {Scientific reports}, volume = {15}, number = {1}, pages = {772}, pmid = {39755794}, issn = {2045-2322}, support = {HEA663000042//Thailand Science research and Innovation Fund Chulalongkorn University/ ; RA66/026//Ratchadapiseksompotch Fund, Faculty of Medicine, Chulalongkorn University/ ; }, mesh = {Thailand/epidemiology ; *Mycobacterium avium Complex/genetics/drug effects/isolation & purification ; Humans ; *Phylogeny ; *Microbial Sensitivity Tests ; *Drug Resistance, Bacterial/genetics ; *Mycobacterium avium-intracellulare Infection/microbiology/drug therapy ; *Genome, Bacterial ; Anti-Bacterial Agents/pharmacology ; Whole Genome Sequencing/methods ; Genetic Variation ; Male ; Female ; Middle Aged ; Aged ; Adult ; }, abstract = {The Mycobacterium avium complex (MAC) is a group of closely related nontuberculous mycobacteria that can cause various diseases in humans. In this study, genome sequencing, comprehensive genomic analysis, and antimicrobial susceptibility testing of 66 MAC clinical isolates from King Chulalongkorn Memorial Hospital, Bangkok, Thailand were carried out. Whole-genome average nucleotide identity (ANI) revealed the MAC species distribution, comprising 54 (81.8%) M. intracellulare, 6 (9.1%) M. avium, 5 (7.6%) M. colombiense, and 1 (1.5%) M. timonense. Phylogenetic analysis revealed a high diversity of M. intracellulare isolates and their evolutionary relationships which could be divided into 2 subspecies: M. intracellulare subsp. intracellulare and M. intracellulare subsp. chimaera. In addition, M. intracellulare subsp. chimaera mostly clustered in the distinct clades separated from M. intracellulare strains originating from other countries. Most MAC isolates were resistant to linezolid and moxifloxacin based on phenotypic antimicrobial susceptibility testing. Mutations within rrl gene associated with clarithromycin resistance were detected in M. intracellulare and M. colombiense. The pan-genome analysis presented clade-specific proteins for M. intracellulare, such as PE and PPE protein families. This study provides valuable insights into the genomic diversity and antimicrobial resistance profiles of MAC isolates circulating in Thailand, which are useful for clinical management, guiding the development of targeted diagnostic, and treatment strategies for MAC infections.}, } @article {pmid39754036, year = {2025}, author = {Pan, M and O'Flaherty, S and Hibberd, A and Gerdes, S and Morovic, W and Barrangou, R}, title = {The curated Lactobacillus acidophilus NCFM genome provides insights into strain specificity and microevolution.}, journal = {BMC genomics}, volume = {26}, number = {1}, pages = {1}, pmid = {39754036}, issn = {1471-2164}, abstract = {BACKGROUND: The advent of next generation sequencing technologies has enabled a surge in the number of whole genome sequences in public databases, and our understanding of the composition and evolution of bacterial genomes. Besides model organisms and pathogens, some attention has been dedicated to industrial bacteria, notably members of the Lactobacillaceae family that are commonly studied and formulated as probiotic bacteria. Of particular interest is Lactobacillus acidophilus NCFM, an extensively studied strain that has been widely commercialized for decades and is being used for the delivery of vaccines and therapeutics.

RESULTS: Here, we revisit the L. acidophilus genome, which was sequenced twenty years ago, and determined the core and pan genomes of 114 publicly available L. acidophilus strains, spanning commercial isolates, academic strains and clones from the scientific literature. Results indicate a predictable high level of homogeneity within the species, but also reveal surprising mis-assemblies. Furthermore, by investigating twenty one available L. acidophilus NCFM-derived variants, we document overall genomic stability, with no observed genomic re-arrangement or inversions.

CONCLUSION: This study provides a comparative analysis of the currently available genomes for L. acidophilus and examines microevolution patterns for several strains derived from L. acidophilus NCFM, which revealed no to very few SNPs with strains sequenced at different points in time using different sequencing technologies and platforms. This re-affirms its suitability for industrial deployment as a probiotic and its use as an engineering chassis and delivery modality for novel biotherapeutics.}, } @article {pmid39752324, year = {2025}, author = {van Dijk, LR and Manson, AL and Earl, AM and Garimella, KV and Abeel, T}, title = {Fast exact gap-affine partial order alignment with POASTA.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae757}, pmid = {39752324}, issn = {1367-4811}, abstract = {MOTIVATION: Partial order alignment is a widely used method for computing multiple sequence alignments, with applications in genome assembly and pangenomics, among many others. Current algorithms to compute the optimal, gap-affine partial order alignment do not scale well to larger graphs and sequences. While heuristic approaches exist, they do not guarantee optimal alignment and sacrifice alignment accuracy.

RESULTS: We present POASTA, a new optimal algorithm for partial order alignment that exploits long stretches of matching sequence between the graph and a query. We benchmarked POASTA against the state-of-the-art on several diverse bacterial gene datasets and demonstrated an average speed-up of 4.1x and up to 9.8x, using less memory. POASTA's memory scaling characteristics enabled the construction of much larger POA graphs than previously possible, as demonstrated by megabase-length alignments of 342 Mycobacterium tuberculosis sequences.

POASTA is available on Github at https://github.com/broadinstitute/poasta.}, } @article {pmid39749377, year = {2024}, author = {Oh, JY and Do, KH and Jeong, JH and Kwak, S and Choe, S and An, D and Chae, JC and Lee, K and Seo, KW}, title = {Whole genome sequencing analysis of enteropathogenic Escherichia coli from human and companion animals in Korea.}, journal = {Journal of veterinary science}, volume = {}, number = {}, pages = {}, doi = {10.4142/jvs.24225}, pmid = {39749377}, issn = {1976-555X}, support = {2022-ER2103-01/KDCA/Korea Disease Control and Prevention Agency/Korea ; 2024-ER2103-00/KDCA/Korea Disease Control and Prevention Agency/Korea ; 23194MFDS012/MFDS/Ministry of Food and Drug Safety/Korea ; }, abstract = {IMPORTANCE: This study is essential for comprehending the zoonotic transmission, antimicrobial resistance, and genetic diversity of enteropathogenic Escherichia coli (EPEC).

OBJECTIVE: To improve our understanding of EPEC, this study focused on analyzing and comparing the genomic characteristics of EPEC isolates from humans and companion animals in Korea.

METHODS: The whole genome of 26 EPEC isolates from patients with diarrhea and 20 EPEC isolates from companion animals in Korea were sequenced using the Illumina HiSeq X (Illumina, USA) and Oxford Nanopore MinION (Oxford Nanopore Technologies, UK) platforms.

RESULTS: Most isolates were atypical EPEC, and did not harbor the bfpA gene. The most prevalent virulence genes were found to be ompT (humans: 61.5%; companion animals: 60.0%) followed by lpfA (humans: 46.2%; companion animals: 60.0%). Although pan-genome analyses showed no apparent correlation among the origin of the strains, virulence profiles, and antimicrobial resistance profiles, isolates included in clade A obtained from both humans and companion animals exhibited high similarity. Additionally, all the isolates included in clade A encoded the ompT gene and did not encode the hlyE gene. The two isolates from companion animals harbored an incomplete bundle-forming pilus region encoding bfpA and bfpB. Moreover, the type IV secretion system-associated genes tra and trb were found in the bfpA-encoding isolates from humans.

CONCLUSIONS AND RELEVANCE: Whole-genome sequencing enabled a more accurate analysis of the phylogenetic structure of EPEC and provided better insights into the understanding of EPEC epidemiology and pathogenicity.}, } @article {pmid39748179, year = {2025}, author = {Michael, TP}, title = {Can a plant biologist fix a thermostat?.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.20382}, pmid = {39748179}, issn = {1469-8137}, support = {INV-040541/GATES/Bill & Melinda Gates Foundation/United States ; }, abstract = {The shift to reductionist biology at the dawn of the genome era yielded a 'parts list' of plant genes and a nascent understanding of complex biological processes. Today, with the genomics era in full swing, advances in high-definition genomics enabled precise temporal and spatial analyses of biological systems down to the single-cell level. These insights, coupled with artificial intelligence-driven in silico design, are propelling the development of the first synthetic plants. By integrating reductionist and systems approaches, researchers are not only reimagining plants as sources of food, fiber, and fuel but also as 'environmental thermostats' capable of mitigating the impacts of a changing climate.}, } @article {pmid39748007, year = {2025}, author = {Islam, MR and Mondol, SM and Hossen, MA and Khatun, MP and Selim, S and Amiruzzaman, and Gomes, DJ and Rahaman, MM}, title = {First report on comprehensive genomic analysis of a multidrug-resistant Enterobacter asburiae isolated from diabetic foot infection from Bangladesh.}, journal = {Scientific reports}, volume = {15}, number = {1}, pages = {424}, pmid = {39748007}, issn = {2045-2322}, mesh = {*Diabetic Foot/microbiology ; *Enterobacter/genetics/isolation & purification/drug effects/pathogenicity ; Humans ; Bangladesh ; *Drug Resistance, Multiple, Bacterial/genetics ; *Anti-Bacterial Agents/pharmacology ; Genome, Bacterial ; Genomics/methods ; Whole Genome Sequencing ; Microbial Sensitivity Tests ; Virulence Factors/genetics ; Phylogeny ; Enterobacteriaceae Infections/microbiology ; Biofilms/drug effects ; }, abstract = {Enterobacter asburiae (E. asburiae) is a gram-negative rod-shaped bacterium which has emerging significance as an opportunistic pathogen having high virulence pattern and drug resistant properties. In this study, we present the detailed analysis of the whole genome sequence of a multidrug-resistant (MDR) E. asburiae strain BDW1M3 from Bangladesh. The isolate was collected from an infected foot wound of a diabetic foot ulcer patient. Through sophisticated genomic techniques encompassing whole genome sequencing and in-depth bioinformatic analyses, this research unveils a profound understanding of the isolate's antimicrobial resistance patterns, virulence determinants, biosynthetic gene clusters, metabolic pathways and pathogenic potential. The isolate displayed resistance to Ampicillin, Fosfomycin, Cefoxitin, Tigecycline, Meropenem, Linezolid, Vancomycin antibiotics and demonstrated the capacity for biofilm formation. Several antimicrobial resistance genes such as blaACT-2,fosA2, baeR, qnrE2, vanA and numbers of virulence genes including ybaJ, csrA, barA, uvrY, pgaD, hlyD, hlyC, terC, purD were detected. Metal resistance genes investigation revealed the presence of cusCFBA operon system, and many other genes including zntA, zitB, czrB. Prophage region of Myoviridae was detected. Comparative genomics with 47 whole genome sequence (n = 47) shed light on the genetic diversity of E. asburiae strains from diverse sources and countries, with a notable observation that strains from both human and non-human origins exhibited significant pathogenicity potential, genomic and phylogenomic relations hinting at potential cross-species transmission. Pangenome analysis indicated toward an expanding pangenome of E. asburiae. Further research and in-depth comprehensive studies are required to investigate the prevalence of E. asburiae in Bangladesh and emphasize towards unraveling the bacterium's inherent pathogenic potential and the intricate molecular mechanisms that underlie its resistance traits and virulence properties.}, } @article {pmid39746989, year = {2025}, author = {Hu, H and Zhao, J and Thomas, WJW and Batley, J and Edwards, D}, title = {The role of pangenomics in orphan crop improvement.}, journal = {Nature communications}, volume = {16}, number = {1}, pages = {118}, pmid = {39746989}, issn = {2041-1723}, support = {LP230100351//Department of Education and Training | Australian Research Council (ARC)/ ; DP210100296//Department of Education and Training | Australian Research Council (ARC)/ ; DP200100762//Department of Education and Training | Australian Research Council (ARC)/ ; }, mesh = {*Crops, Agricultural/genetics ; *Genomics/methods ; *Genome, Plant ; *Plant Breeding/methods ; Gene Editing/methods ; Genetic Variation ; Food Security ; }, abstract = {Global food security depends heavily on a few staple crops, while orphan crops, despite being less studied, offer the potential benefits of environmental adaptation and enhanced nutritional traits, especially in a changing climate. Major crops have benefited from genomics-based breeding, initially using single genomes and later pangenomes. Recent advances in DNA sequencing have enabled pangenome construction for several orphan crops, offering a more comprehensive understanding of genetic diversity. Orphan crop research has now entered the pangenomics era and applying these pangenomes with advanced selection methods and genome editing technologies can transform these neglected species into crops of broader agricultural significance.}, } @article {pmid39745367, year = {2024}, author = {Chauhan, SM and Ardalani, O and Hyun, JC and Monk, JM and Phaneuf, PV and Palsson, BO}, title = {Decomposition of the pangenome matrix reveals a structure in gene distribution in the Escherichia coli species.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0053224}, doi = {10.1128/msphere.00532-24}, pmid = {39745367}, issn = {2379-5042}, abstract = {UNLABELLED: Thousands of complete genome sequences for strains of a species that are now available enable the advancement of pangenome analytics to a new level of sophistication. We collected 2,377 publicly available complete genomes of Escherichia coli for detailed pangenome analysis. The core genome and accessory genomes consisted of 2,398 and 5,182 genes, respectively. We developed a machine learning approach to define the accessory genes characterizing the major phylogroups of E. coli plus Shigella: A, B1, B2, C, D, E, F, G, and Shigella. The analysis resulted in a detailed structure of the genetic basis of the phylogroups' differential traits. This pangenome structure was largely consistent with a housekeeping-gene-based MLST distribution, sequence-based Mash distance, and the Clermont quadruplex classification. The rare genome (consisting of genes found in <6.8% of all strains) consisted of 163,619 genes, about 79% of which represented variations of 315 underlying transposon elements. This analysis generated a mathematical definition of the genetic basis for a species.

IMPORTANCE: The comprehensive analysis of the pangenome of Escherichia coli presented in this study marks a significant advancement in understanding bacterial genetic diversity. By employing machine learning techniques to analyze 2,377 complete E. coli genomes, the study provides a detailed mapping of core, accessory, and rare genes. This approach reveals the genetic basis for differential traits across phylogroups, offering insights into pathogenicity, antibiotic resistance, and evolutionary adaptations. The findings enhance the potential for genome-based diagnostics and pave the way for future studies aimed at achieving a global genetic definition of bacterial phylogeny.}, } @article {pmid39745363, year = {2024}, author = {Akusobi, C and Choudhery, S and Benghomari, BS and Wolf, ID and Singhvi, S and Ioerger, TR and Rubin, EJ}, title = {Transposon-sequencing across multiple Mycobacterium abscessus isolates reveals significant functional genomic diversity among strains.}, journal = {mBio}, volume = {}, number = {}, pages = {e0337624}, doi = {10.1128/mbio.03376-24}, pmid = {39745363}, issn = {2150-7511}, abstract = {UNLABELLED: Mycobacterium abscessus (Mab) is a clinically significant pathogen and a highly genetically diverse species due to its large accessory genome. The functional consequence of this diversity remains unknown mainly because, to date, functional genomic studies in Mab have been primarily performed on reference strains. Given the growing public health threat of Mab infections, understanding the functional genomic differences among Mab clinical isolates can provide more insight into how its genetic diversity influences gene essentiality, clinically relevant phenotypes, and importantly, potential drug targets. To determine the functional genomic diversity among Mab strains, we conducted transposon-sequencing (TnSeq) on 21 genetically diverse clinical isolates, including 15 M. abscessus subsp. abscessus isolates and 6 M. abscessus subsp. massiliense isolates, cataloging all the essential and non-essential genes in each strain. Pan-genome analysis revealed a core set of 3,845 genes and a large accessory genome of 11,507. We identified 259 core essential genes across the 21 clinical isolates and 425 differentially required genes, representing ~10% of the Mab core genome. We also identified genes whose requirements were subspecies, lineage, and isolate-specific. Finally, by correlating TnSeq profiles, we identified 19 previously uncharacterized genetic networks in Mab. Altogether, we find that Mab clinical isolates are not only genetically diverse but functionally diverse as well.

IMPORTANCE: This study investigates the genetic diversity of Mycobacterium abscessus (Mab), a bacteria known for causing difficult-to-treat infections. Researchers performed transposon-sequencing (TnSeq) on 21 different clinical isolates of Mab to identify essential and non-essential genes in each strain. Through this analysis, they identified core genes required for growth across all strains. Interestingly, they also identified genes whose requirement for growth or "essentiality" were subspecies, lineage, and isolate-specific. This study reveals that Mab's genetic diversity translates into significant functional differences among clinical isolates. Insights from this paper lay essential groundwork for future studies exploring the biological and clinical implications of genetic diversity in Mab clinical isolates. Understanding this diversity could guide targeted therapies and offer new insights into managing infections caused by Mab, a growing public health concern.}, } @article {pmid39742300, year = {2024}, author = {Li, D and Wang, Y and Yuan, T and Cao, M and He, Y and Zhang, L and Li, X and Jiang, Y and Li, K and Sun, J and Lv, G and Su, G and Wang, Q and Pan, Y and Li, X and Jiang, Y and Yang, G and Groenen, MAM and Derks, MFL and Ding, R and Ding, X and Yu, T}, title = {Pangenome and genome variation analyses of pigs unveil genomic facets for their adaptation and agronomic characteristics.}, journal = {iMeta}, volume = {3}, number = {6}, pages = {e257}, pmid = {39742300}, issn = {2770-596X}, abstract = {The development of a comprehensive pig graph pangenome assembly encompassing 27 genomes represents the most extensive collection of pig genomic data to date. Analysis of this pangenome reveals the critical role of structural variations in driving adaptation and defining breed-specific traits. Notably, the study identifies BTF3 as a key candidate gene governing intramuscular fat deposition and meat quality in pigs. These findings underscore the power of pangenome approaches in uncovering novel genomic features underlying economically important agricultural traits. Collectively, these results demonstrate the value of leveraging large-scale, multi-genome analyses for advancing our understanding of livestock genomes and accelerating genetic improvement.}, } @article {pmid39741590, year = {2024}, author = {Banerjee, R and Chaudhari, NM and Lahiri, A and Gautam, A and Bhowmik, D and Dutta, C and Chattopadhyay, S and Huson, DH and Paul, S}, title = {Corrigendum: Interplay of various evolutionary modes in genome diversification and adaptive evolution of the family Sulfolobaceae.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1538738}, doi = {10.3389/fmicb.2024.1538738}, pmid = {39741590}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2021.639995.].}, } @article {pmid39738919, year = {2024}, author = {Han, JE and Kang, S and Lee, SY and Bae, JW}, title = {Characterisation of Aequorivita ciconiae sp. nov., isolated from oriental stork, Ciconia boyciana.}, journal = {Antonie van Leeuwenhoek}, volume = {118}, number = {2}, pages = {47}, pmid = {39738919}, issn = {1572-9699}, support = {RS-2023-00227274//National Research Foundation of Korea/ ; NRF-2020R1A2C3012797//Mid-Career Researcher Programme/ ; 22213MFDS537//Ministry of Food and Drug Safety/ ; }, mesh = {*Phylogeny ; *RNA, Ribosomal, 16S/genetics ; *Flavobacteriaceae/genetics/classification/isolation & purification ; Animals ; *DNA, Bacterial/genetics ; *Base Composition ; *Fatty Acids/chemistry/analysis ; Republic of Korea ; *Bacterial Typing Techniques ; Birds/microbiology ; Feces/microbiology ; Sequence Analysis, DNA ; Phospholipids/analysis ; }, abstract = {A single novel bacterial strain designated as H23M31[T] was isolated from the faecal sample of oriental stork (Ciconia boyciana) that inhabits the Republic of Korea. It was a rod-shaped, facultative anaerobic, Gram-negative, and non-motile strain. Phylogenetic analysis based on the 16S rRNA gene sequence revealed that it branched from Aequorivita within Flavobacteriaceae. It was most closely related to A. capsosiphonis DSM 23843[ T], which shared the sequence similarity of 96.36%. The strain exhibited optimal growth at pH 7.0 in a marine broth medium with 1% NaCl incubated at 30 °C. Chemotaxonomic characteristics indicated that the predominant cellular fatty acids were iso-C15:0 (24.4%), iso-C17:0 3-OH (15.9%), and anteiso-C15:0 (13.9%). The polar lipid of the strain contained phosphatidylcholine (PC), phosphatidylglycerol (PG), and diphosphatidylglycerol (DPG). The major isoprenoid quinone was menaquinone 6 (MK-6), which was identical with that of a closely related Aequorivita species. The genomic G + C contents of the strain was 38.25 mol%. Average nucleotide identity (ANI), average amino acid identity (AAI), and digital DNA-DNA hybridization (dDDH) values between the novel isolate and A. viscosa CGMCC 1.11023[ T] were 75.83%, 80.34% and 20.50%, respectively. Phylogenetic analyses revealed the evolutionary relationships of the strain, demonstrating that the strain clusters with other Aequorivita species. Pan-genome analyses and genome comparisons indicated that, unlike other environmentally isolated species, it possesses unique genes that enhance its ability to colonise the harsh animal gut environment. Taxonomic characterisation suggested that strain H23M31[T] represents a novel Aequorivita species, and the proposed name is Aequorivita ciconiae sp. nov. The type strain of A. ciconiae is H23M31[T] (= KCTC 62809[ T] = JCM 33229[ T]).}, } @article {pmid39736996, year = {2024}, author = {Lu, W and Zha, B and Lyu, J and LingHu, C and Chen, J and Deng, S and Zhang, X and Li, L and Wang, G}, title = {Whole-genome sequencing and genomic analysis of four Akkermansia strains newly isolated from human feces.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1500886}, pmid = {39736996}, issn = {1664-302X}, abstract = {BACKGROUND: Numerous studies have demonstrated that Akkermansia is closely associated with human health. These bacteria colonize the mucus layer of the gastrointestinal tract and utilize mucin as their sole source of carbon and nitrogen. Akkermansia spp. exhibit potential as probiotics under specific conditions. However, the gene accumulation curve derived from pan-genome analysis suggests that the genome of Akkermansia strains remains open. Consequently, current genome mining efforts are insufficient to fully capture the intraspecific and interspecific characteristics of Akkermansia, necessitating continuous exploration of the genomic and phenotypic diversity of new isolates.

METHODS: Based on this finding, we sequenced, assembled, and functionally annotated the whole genomes of four new human isolates from our laboratory: AKK-HX001, AKK-HX002, AKK-HX003, and AKK-HX004.

RESULTS: Phylogenetic analysis revealed that all four isolates belonged to the AmII phylogroup, whereas the type strain DSM 22959 is classified within the AmI phylogroup. Moreover, 2,184 shared homologous genes were identified among the four isolates. Functional annotation using the COG, KEGG, and CAZy databases indicated that the functional genes of the four isolates were primarily associated with metabolism. Two antibiotic resistance genes were identified in AKK-HX001 and AKK-HX002, while three resistance genes were detected in AKK-HX003 and AKK-HX004. Additionally, each of the four isolates possessed two virulence genes and three pathogenicity genes, none of which were associated with pathogenicity. The prediction of mobile genetic elements indicated unequal distributions of GIs among the isolates, and a complete CRISPR system was identified in all isolates except AKK-HX003. Two annotated regions of secondary metabolite biosynthesis genes, both belonging to Terpene, were detected using the antiSMASH online tool.

CONCLUSION: These findings indicate that the four Akkermansia isolates, which belong to a phylogroup distinct from the model strain DSM 22959, exhibit lower genetic risk and may serve as potential probiotic resources for future research.}, } @article {pmid39736538, year = {2024}, author = {Radjasa, OK and Steven, R and Natanael, Y and Nugrahapraja, H and Radjasa, SK and Kristianti, T and Moeis, MR and Trinugroho, JP and Suharya, HB and Rachmatsyah, AO and Dwijayanti, A and Putri, MR and de Fretes, CE and Siallagan, ZL and Fadli, M and Opier, RDA and Farahyah, JD and Rahmawati, V and Rizanti, M and Humaira, Z and Prihatmanto, AS and Hananto, ND and Susanto, RD and Chahyadi, A and Elfahmi, and Priharto, N and Kamarisima, and Dwivany, FM}, title = {From the depths of the Java Trench: genomic analysis of Priestia flexa JT4 reveals bioprospecting and lycopene production potential.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {1259}, pmid = {39736538}, issn = {1471-2164}, mesh = {*Lycopene/metabolism ; Indonesia ; *Genome, Bacterial ; Bioprospecting ; Genomics/methods ; Phylogeny ; Multigene Family ; Peptide Synthases/genetics/metabolism ; Whole Genome Sequencing ; Carotenoids/metabolism ; Actinobacteria/genetics/metabolism ; }, abstract = {BACKGROUND: The marine environment boasts distinctive physical, chemical, and biological characteristics. While numerous studies have delved into the microbial ecology and biological potential of the marine environment, exploration of genetically encoded, deep-sea sourced secondary metabolites remains scarce. This study endeavors to investigate marine bioproducts derived from deep-sea water samples at a depth of 1,000 m in the Java Trench, Indonesia, utilizing both culture-dependent and whole-genome sequencing methods.

RESULTS: Our efforts led to the successful isolation and cultivation of a bacterium Priestia flexa JT4 from the water samples, followed by comprehensive genome sequencing. The resultant high-quality draft genome, approximately 4 Mb, harbored 5185 coding sequences (CDSs). Notably, 61.97% of these CDSs were inadequately characterized, presenting potential novel CDSs. This study is the first to identify the "open-type" (α < 1) pangenome within the genus Priestia. Moreover, our analysis uncovered eight biosynthetic gene clusters (BGCs) using the common genome mining pipeline, antiSMASH. Two non-ribosomal peptide synthetase (NRPS) BGCs within these clusters exhibited the potential to generate novel biological compounds. Noteworthy is the confirmation that the terpene BGC in P. flexa JT4 can produce lycopene, a compound in substantial industrial demand. The presence of lycopene in the P. flexa JT4 cells was verified using Ultra-performance liquid chromatography-mass spectrometry (UPLC-MS/MS) in multiple reaction modes.

CONCLUSIONS: This study highlights the bioprospecting opportunity to explore novel bioproducts and lycopene compounds from P. flexa JT4. It marks the pioneering exploration of deep-sea bacterium bioprospecting in Indonesia, seeking to unveil novel bioproducts and lycopene compounds through a genome mining approach.}, } @article {pmid39725890, year = {2024}, author = {Javier-López, R and Geliashvili, N and Birkeland, NK}, title = {Comparative genomics of Fervidobacterium: a new phylogenomic landscape of these wide-spread thermophilic anaerobes.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {1248}, pmid = {39725890}, issn = {1471-2164}, support = {328955//Norges Forskningsråd/ ; 328955//Norges Forskningsråd/ ; 862555//ERA-NET Cofund on Food Systems and Climate (FOSC)/ ; 862555//ERA-NET Cofund on Food Systems and Climate (FOSC)/ ; 862555//ERA-NET Cofund on Food Systems and Climate (FOSC)/ ; CPEA-LT-2017/10061//Norwegian Directorate for Higher Education and Skills/ ; CPEA-LT-2017/10061//Norwegian Directorate for Higher Education and Skills/ ; }, mesh = {*Phylogeny ; *Genome, Bacterial ; *Genomics ; RNA, Ribosomal, 16S/genetics ; Base Composition ; }, abstract = {BACKGROUND: Fervidobacterium is a genus of thermophilic anaerobic Gram-negative rod-shaped bacteria belonging to the phylum Thermotogota. They can grow through fermentation on a wide range of sugars and protein-rich substrates. Some can also break down feather keratin, which has significant biotechnological potential. Fervidobacteria genomes have undergone several horizontal gene transfer events, sharing DNA with unrelated microbial taxa. Despite increasing biotechnological and evolutionary interest in this genus, only seven species have been described to date. Here, we present and describe six new and complete Fervidobacterium genomes, including the type strains Fervidobacterium gondwanense CBS-1[ T], F. islandicum H-21[ T] and F. thailandense FC2004[T], one novel isolate from Georgia (strain GSH) and two strains (DSM 21710 and DSM 13770) that have not been previously described along with an evolutionary and phylogenomic analysis of the genus.

RESULTS: The complete genomes were around 2 Mb with approximately 2,000 CDS identified and annotated in each of them and a G + C content ranging from 38.9 mol% to 45.8 mol%. Phylogenomic comparisons of all currently available Fervidobacterium genomes, including OrthoANI and TYGS analyses, as well as a phylogenetic analysis based on the 16S rRNA gene, identified six species and nine subspecies clusters across the genus, with a consistent topology and a distant and separately branching species, Fervidobacterium thailandense. F. thailandense harbored the highest number of transposases, CRISPR clusters, pseudo genes and horizontally transferred regions The pan genome of the genus showed that 44% of the genes belong to the cloud pangenome, with most of the singletons found also in F. thailandense.

CONCLUSIONS: The additional genome sequences described in this work and the comparison with all available Fervidobacterium genome sequences provided new insights into the evolutionary history of this genus and supported a phylogenetic reclassification. The phylogenomic results from OrthoANI and TYGS analyses revealed that F. riparium and F. gondwanense belong to the same genome species, and includes Fervidobacterium sp. 13770, while "F. pennivorans" strain DYC belongs to a separate genome species, whereas Fervidobacterium sp. 21710 and Fervidobacterium sp. GSH within the Fervidobacterium pennivorans clade represent two subspecies. F. changbaicum is reclassified as F. islandicum.}, } @article {pmid39724170, year = {2024}, author = {Farrell, AA and Nesbø, CL and Zhaxybayeva, O}, title = {Bacterial growth temperature as a horizontally acquired polygenic trait.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae277}, pmid = {39724170}, issn = {1759-6653}, abstract = {Evolutionary events leading to organismal preference for a specific growth temperature, as well as genes whose products are needed for a proper function at that temperature, are poorly understood. Using 64 bacteria from phylum Thermotogota as a model system, we examined how optimal growth temperature changed throughout Thermotogota history. We inferred that Thermotogota's last common ancestor was a thermophile and that some Thermotogota evolved the mesophilic and hyperthermophilic lifestyles secondarily. By modeling gain and loss of genes throughout Thermotogota history and by reconstructing their phylogenies, we demonstrated that adaptations to lower and higher growth temperature involve both the acquisition of necessary genes and loss of unnecessary genes. Via a pangenome-wide association study, we correlated presence/absence of 68 genes with specific optimal growth temperature intervals. While some of these genes are poorly characterized, most are involved in metabolism of amino acids, nucleotides, carbohydrates, and lipids, as well as in signal transduction and regulation of transcription. Most of the 68 genes have a history of horizontal gene transfer with other bacteria and archaea that often grow at similar temperatures, suggesting that parallel acquisitions of genes likely promote independent adaptations of different Thermotogota species to specific growth temperatures.}, } @article {pmid39722670, year = {2024}, author = {Yakubov, LA and Taranov, OS and Sidorov, SV and Nikonov, SD and Ostanin, AA and Chernykh, ER and Kolchanov, NA and Bogachev, SS}, title = {The concept of natural genome reconstruction.Part 1. Basic provisions of the "natural genome reconstruction" concept. Changing the genome of hematopoietic stem cells using several natural cellular mechanisms that are inherent in the hematopoietic cell and determine its biological status as "the source of the body's reparative potential".}, journal = {Vavilovskii zhurnal genetiki i selektsii}, volume = {28}, number = {7}, pages = {696-705}, doi = {10.18699/vjgb-24-78}, pmid = {39722670}, issn = {2500-0462}, abstract = {We present a series of articles proving the existence of a previously unknown mechanism of interaction between hematopoietic stem cells and extracellular double-stranded DNA (and, in particular, double-stranded DNA of the peripheral bloodstream), which explains the possibility of emergence and fixation of genetic information contained in double-stranded DNA of extracellular origin in hematopoietic stem cells. The concept of the possibility of stochastic or targeted changes in the genome of hematopoietic stem cells is formulated based on the discovery of new, previously unknown biological properties of poorly differentiated hematopoietic precursors. The main provisions of the concept are as follows. The hematopoietic stem cell takes up and internalizes fragments of extracellular double-stranded DNA via a natural mechanism. Specific groups of glycocalyx factors, including glycoproteins/proteoglycans, glycosylphosphatidylinositol-anchored proteins and scavenger receptors, take part in the internalization event. The binding sites for DNA fragments are heparin-binding domains and clusters of positively charged amino acid residues that are parts of protein molecules of these factors. Extracellular fragments delivered to the internal compartments of hematopoietic stem cells initiate terminal differentiation, colony formation, and proliferation of hematopoietic precursors. The molecular manifestation of these processes is the emergence and repair of pangenomic single-strand breaks. The occurrence of pangenomic single-strand breaks and restoration of genome (genomic DNA) integrity are associated with activation of a "recombinogenic situation" in the cell; during its active phase, stochastic homologous recombination or other recombination events between extracellular fragments localized in the nucleus and chromosomal DNA are possible. As a result, genetic material of initially extracellular localization either integrates into the recipient genome with the replacement of homologous chromosomal segments, or is transitively present in the nucleus and can manifest itself as a new genetic trait. It is assumed that as a result of stochastic acts of homologous exchange, chromosome loci are corrected in hematopoietic stem cells that have acquired mutations during the existence of the organism, which are the cause of clonal hematopoiesis associated with old age. In this regard, there is a fundamental possibility of changing the hematopoietic status of hematopoietic stem cells in the direction of polyclonality and the original diversity of clones. Such events can form the basis for the rejuvenation of the blood-forming cell system. The results of the laboratory's work indicate that other stem cells in the body capture extracellular DNA fragments too. This fact creates a paradigm for the overall rejuvenation of the body.}, } @article {pmid39722539, year = {2025}, author = {Zepeda-Rivera, MA and Eisele, Y and Baryiames, A and Wu, H and Mengoni, C and Piccinno, G and McMahon, EF and LaCourse, KD and Jones, DS and Hauner, H and Minot, SS and Segata, N and Dewhirst, FE and Johnston, CD and Bullman, S}, title = {Fusobacterium sphaericum sp. nov., isolated from a human colon tumor adheres to colonic epithelial cells and induces IL-8 secretion.}, journal = {Gut microbes}, volume = {17}, number = {1}, pages = {2442522}, doi = {10.1080/19490976.2024.2442522}, pmid = {39722539}, issn = {1949-0984}, mesh = {Humans ; *Interleukin-8/metabolism/genetics ; *Colonic Neoplasms/microbiology/pathology ; *Fusobacterium/isolation & purification/genetics ; *Epithelial Cells/microbiology ; *Phylogeny ; Bacterial Adhesion ; Colon/microbiology/pathology ; Feces/microbiology ; Adenocarcinoma/microbiology/pathology ; Gastrointestinal Microbiome ; RNA, Ribosomal, 16S/genetics ; Genome, Bacterial ; }, abstract = {Cancerous tissue is a largely unexplored microbial niche that provides a unique environment for the colonization and growth of specific bacterial communities, and with it, the opportunity to identify novel bacterial species. Here, we report distinct features of a novel Fusobacterium species, F. sphaericum sp. nov. (Fs), isolated from primary colon adenocarcinoma tissue. We acquire the complete closed genome and associated methylome of this organism and phylogenetically confirm its classification into the Fusobacterium genus, with F. perfoetens as its closest neighbor. Fs is phenotypically and genetically distinct, with morphological analysis revealing its coccoid shape, that while similar to F. perfoetens is rare for most Fusobacterium members. Fs displays a metabolic profile and antibiotic resistance repertoire consistent with other Fusobacterium species. In vitro, Fs has adherent and immunomodulatory capabilities, as it intimately associates with human colon cancer epithelial cells and promotes IL-8 secretion. An analysis of the prevalence and abundance of Fs in > 20,000 human metagenomic samples shows that it is a rarely detected member within human stool with variable relative abundance, found in both healthy controls and patients with colorectal cancer (CRC). Our study sheds light on a novel bacterial species isolated directly from the human CRC tumor niche and given its in vitro interaction with cancer epithelial cells suggests that its role in human health and disease warrants further investigation.}, } @article {pmid39722458, year = {2024}, author = {He, W and Li, X and Qian, Q and Shang, L}, title = {The Developments and Prospects of Plant Super Pangenomes: Demands, Approaches and Applications.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101230}, doi = {10.1016/j.xplc.2024.101230}, pmid = {39722458}, issn = {2590-3462}, abstract = {By integrating genomes of different accessions, the pangenome can offer more comprehensive and reference-bias-free population genetic information in a species than a single reference genome. With the rapid accumulation of genomic sequencing data and the expanding scope of plant research, the focus of plant pangenomics research has gradually evolved from a single species to multiple species in recent years, giving rise to the concept of super pangenome that cover all genomic sequences of a genus-level taxonomic groups. By integrating more cultivated and wild species, the super pangenome has made significant contributions to the resolution of multiple research areas such as plant genetic diversity, evolution, domestication, and molecular breeding. Here, we provide a comprehensive overview of the plant super pangenomes, focusing on its unique value and development demands, construction approaches, potential applications and achievements. We highlight the distinctive advantages and promising prospects of super pangenomes and discuss the current challenges and future directions.}, } @article {pmid39719828, year = {2024}, author = {Zhang, L and Liu, Y and Huang, Y and Zhang, Y and Fu, Y and Xiao, Y and Chen, S and Zhang, K and Cheng, F}, title = {SolPGD: Solanaceae Pan-genomes Reveal Extensive Fractionation and Functional Innovation of Duplicated Genes.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101231}, doi = {10.1016/j.xplc.2024.101231}, pmid = {39719828}, issn = {2590-3462}, abstract = {The Solanaceae family contains many agriculturally important crops, including tomato, potato, pepper, and tobacco, as well as others with growing potential, such as the orphan crops groundcherry, wolfberry, and pepino. Research progress varies greatly among these species, with model crops like tomato far ahead, which limits the broader agricultural application of other solanaceous species. Here, we constructed the interspecies pan-genome for the Solanaceae family and identified distinct patterns of gene retention. We reveal that the activity of specific transposable elements is associated with gene fractionation and transposition. The pan-genome is further resolved at the level of T subgenomes that were generated by Solanaceae specific paleo-hexaploidization (T event). We show the strong fractionation (loss) and divergence of genes resulting from ancient duplications. For example, all the class A and E flower model genes in Solanaceae originally evolved from two tandemly duplicated genes, which further expanded through the γ and T events and then fractionated into ten genes in tomato, acquiring distinct functions critical for fruit development. Based on these results, we developed the Solanaceae Pan-Genome Database (SolPGD, http://www.bioinformaticslab.cn/SolPGD), which integrates various datasets of the inter- and intra-pangenomes of Solanaceae. These findings and valuable resources will further promote studies of solanaceous species, including the orphan crops.}, } @article {pmid39718018, year = {2024}, author = {Tian, R and Xie, F and Wang, X and Dai, L and Wang, J and Liu, Y and Zhao, C and Li, Q and Zhang, W}, title = {Epidemiological investigation and drug resistance analysis of Avian pathogenic Escherichia coli (APEC) of Wenchang chickens in Hainan, China.}, journal = {Avian pathology : journal of the W.V.P.A}, volume = {}, number = {}, pages = {1-41}, doi = {10.1080/03079457.2024.2447296}, pmid = {39718018}, issn = {1465-3338}, abstract = {Avian pathogenic Escherichia coli (APEC) is one of the major causes of poultry morbidity worldwide, severely reducing egg production and embryo hatchability in laying hens. Hainan Wenchang chicken is an important poultry breed in Hainan, China, and its culture has been affected by APEC for a long time. In this study, in order to investigate the causes of low hatchability and a large number of weak chicks during the breeding of Wenchang chicken, a total of 130 strains of APEC were isolated from 591 chicken embryo samples collected from five large-scale farms of Wenchang chicken in Hainan area. The APEC isolates from Hainan Wenchang chicken embryos were analyzed in terms of serotypes, drug resistance, genomes and evolutionary relationships. O8 was the main prevalent serotype of APEC from embryos, and the isolates were highly resistant to antibiotics and multi-drug resistant. 19.2% of the isolates were resistant to 14 antibiotics with a resistance rate of more than 73%. The number of resistance genes carried by APEC isolates from embryos was stabilized at 53. The results of pan-genomic analysis showed that the genomes of Hainan Wenchang chicken embryo APEC isolates would be concentrated in 2 evolutionary clusters, and the isolates of the same serotype had extremely close evolutionary relationships. This study is the first large-scale isolation and analysis of APEC isolated from Wenchang chicken embryos, which can provide a reference for the prevention and control of APEC and antibiotic use in Hainan Wenchang chickens.}, } @article {pmid39717611, year = {2024}, author = {Baati, H and Siala, M and Benali, S and Azri, C and Dunlap, C and Martínez-Espinosa, RM and Trigui, M}, title = {Elucidating metabolic pathways through genomic analysis in highly heavy metal-resistant Halobacterium salinarum strains.}, journal = {Heliyon}, volume = {10}, number = {23}, pages = {e40822}, pmid = {39717611}, issn = {2405-8440}, abstract = {The annotated and predicted genomes of five archaeal strains (AS1, AS2, AS8, AS11 and AS19), isolated from Sfax solar saltern sediments (Tunisia) and affiliated with Halobacterium salinarum, were performed by RAST webserver (Rapid Annotation using Subsystem Technology) and NCBI prokaryotic genome annotation pipeline (PGAP). The results showed the ability of strains to use a reduced semi-phosphorylative Entner-Doudoroff pathway for glucose degradation and an Embden-Meyerhof one for gluconeogenesis. They could use glucose, fructose, glycerol, and acetate as sole source of carbon and energy. ATP synthase, various cytochromes and aerobic respiration proteins were encoded. All strains showed fermentation capability through the arginine deiminase pathway and facultative anaerobic respiration using electron acceptors (Dimethyl sulfoxide and trimethylamine N-oxide). Several biosynthesis pathways for many amino acids were identified. Comparative and pangenome analyses between the strains and the well-studied halophilic archaea Halobacterium NRC-1 highlighted a notable dissimilarity. Besides, the strains shared a core genome of 1973 genes and an accessory genome of 767 genes. 129, 94, 67, 15 and 29 unique genes were detected in the AS1, AS2, AS8, AS11 and AS19 genomes, respectively. Most of these unique genes code for hypothetical proteins. The strains displayed plant-growth promoting characteristics under heavy metal stress (Ammonium assimilation, phosphate solubilization, chemotaxis, cell motility and production of indole acetic acid, siderophore and phenazine). Therefore, they could be used as a biofertilizer to promote plant growth. The genomes encoded numerous biotechnologically relevant genes responsible for vitamin biosynthesis, including cobalamin, folate, biotin, pantothenate, riboflavin, thiamine, menaquinone, nicotinate, and nicotinamide. The carotenogenetic pathway of the studied strains was also predicted. Consequently, the findings of this study contribute to a better understanding of the halophilic archaea metabolism providing valuable insights into their ecophysiology as well as relevant biotechnological applications.}, } @article {pmid39713061, year = {2024}, author = {Ning, W and Wang, W and Liu, Z and Xie, W and Chen, H and Hong, D and Yang, QY and Cheng, S and Guo, L}, title = {The pan-NLRome analysis based on 23 genomes reveals the diversity of NLRs in Brassica napus.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {44}, number = {12}, pages = {2}, pmid = {39713061}, issn = {1572-9788}, abstract = {Brassica napus, a globally significant oilseed crop, exhibits a wide distribution across diverse climatic zones. B. napus is being increasingly susceptible to distinct diseases, such as blackleg, clubroot and sclerotinia stem rot, leading to substantial reductions in yield. Nucleotide-binding site leucine-rich repeat genes (NLRs), the most pivotal family of resistance genes, can be effectively harnessed by identifying and uncovering their diversity to acquire premium disease-resistant gene resources. Here, we collected the genomes of 23 accessions and established the first comprehensive pan-NLRome in B. napus by leveraging multiple genomic resources. We observe significant variation in the number of NLR genes across different B. napus accessions, ranging from 189 to 474. Notably, TNL (TIR-NBS-LRR) genes constitute approximately half of the total count, indicating their predominant presence in B. napus. The number of NLRs in the C subgenome is significantly higher than that in the A subgenome, and chromosome C09 exhibits the highest density of NLR genes with featuring multiple NLR clusters. Domain analysis reveals that the integrated domains significantly enhance the diversity of NLRs, with B3 DNA binding, VQ, and zinc fingers being the most prevalent integrated domains. Pan-genomic analysis reveals that the core type of NLR genes, which is present in most accessions, constitutes approximately 58% of the total NLRs. Furthermore, we conduct a comparative analysis of the diversity of NLR genes across distinct ecotypes, leading to the identification of ecotype-specific NLRs and their integrated domains. In conclusion, our study effectively addresses the limitations of a single reference genome and provides valuable insights into the diversity of NLR genes in B. napus, thereby contributing to disease resistance breeding.}, } @article {pmid39708886, year = {2024}, author = {Bano, S and Khatoon, A and Quareshi, U and Ul-Haq, Z and Karim, A}, title = {Pan-genome analysis and drug repurposing strategies for extensively drug-resistant Salmonella Typhi: Subtractive genomics and e-pharmacophore approaches.}, journal = {International journal of biological macromolecules}, volume = {}, number = {}, pages = {139003}, doi = {10.1016/j.ijbiomac.2024.139003}, pmid = {39708886}, issn = {1879-0003}, abstract = {In the current study, we presented the genome sequence and taxonomic classification of the new extensively drug-resistant (XDR) Salmonella enterica serovar Typhi strain JRCGR-ST-AK02. Its genome size was found to be 4,780,534 bp, containing 4864 genes. Taxonomic classification was performed based on the Average Nucleotide Identity (ANI), Genome-to-Genome Distance Calculator (GGDC) and Average Amino Acid Identity (AAI) analysis. Pan-genome analysis revealed 34,4915 core genes, which are predominantly involved in general functions and carbohydrate metabolism. We used a subtractive genomics approach and identified the PocR protein as a drug target. Its 3D structure was built using homology modeling, and an e-pharmacophore hypothesis was created using its binding site. The pharmacophore hypothesis was screened against FDA-approved ligands library and a total of 2018 out 9392 drugs were selected for molecular docking. Cangrelor and Pentagastrin presented the highest docking scores (≥ -9.0). The binding dynamics of these promising FDA-approved drugs were further confirmed through 200 ns molecular dynamics simulation, highlighting their stable and strong interactions with the PocR protein. Our study highlights the potential of Cangrelor and Pentagastrin for repurposing against XDR Salmonella Typhi. By identifying these drugs as promising candidates, we pave the way for new treatments for XDR Salmonella Typhi infections.}, } @article {pmid39702388, year = {2024}, author = {Liu, H and Fan, Z and Tong, N and Lin, J and Huang, Y and Duan, Y and Zhu, X}, title = {The exploration of high production of tiancimycins in Streptomyces sp. CB03234-S revealed potential influences of universal stress proteins on secondary metabolisms of streptomycetes.}, journal = {Microbial cell factories}, volume = {23}, number = {1}, pages = {337}, pmid = {39702388}, issn = {1475-2859}, support = {CX20210112//the Hunan Provincial Innovation Foundation for Postgraduate/ ; 2021zzts0330//the Fundamental Research Funds for the Central Universities of Central South University (CSU)/ ; 2023SK2071//the Science and Technology Innovation Program of Hunan Province/ ; BP0820034//the Chinese Ministry of Education 111 Project/ ; BP0820034//the Chinese Ministry of Education 111 Project/ ; }, mesh = {*Streptomyces/metabolism/genetics ; *Secondary Metabolism ; *Bacterial Proteins/genetics/metabolism ; Genome, Bacterial ; }, abstract = {BACKGROUND: Universal stress proteins (USPs) are prevalent in various bacteria to cope with different adverse stresses, while their possible effects on secondary metabolisms of hosts are unclear. Tiancimycins (TNMs) are ten-membered endiynes possessing excellent potential for development of anticancer antibody-drug conjugates. During our efforts to improve TNMs titer, a high-producing strain Streptomyces sp. CB03234-S had been obtained and its possible high yield mechanism is being continuously explored to further enhance TNMs production.

RESULTS: In this work, the whole-genome resequencing and analysis results revealed a notable 583 kb terminal deletion containing 8 highly expressed usp genes in the genome of CB03234-S. The individual complementation of lost USPs in CB03234-S all showed differential effects on secondary metabolism, especially TNMs production. Among them, the overexpression of USP3 increased TNMs titer from 12.8 ± 0.2 to 31.1 ± 2.3 mg/L, while the overexpression of USP8 significantly reduced TNMs titer to only 1.0 ± 0.1 mg/L, but activated the production of porphyrin-type compounds. Subsequent genetic manipulations on USP3/USP8 orthologs in Streptomyces. coelicolor A3(2) and Streptomyces sp. CB00271 also presented clear effects on the secondary metabolisms of hosts. Further sequence similarity network analysis and Streptomyces-based pan‑genomic analysis suggested that the USP3/USP8 orthologs are widely distributed across Streptomyces.

CONCLUSION: Our studies shed light on the potential effects of USPs on secondary metabolisms of streptomycetes for the first time, and USPs could become novel targets for exploring and exploiting natural products in streptomycetes.}, } @article {pmid39699575, year = {2024}, author = {Xie, X and Deng, X and Chen, L and Yuan, J and Chen, H and Wei, C and Feng, C and Liu, X and Qiu, G}, title = {From Gene to Structure: Unraveling Genomic Dark Matter in Ca. Accumulibacter.}, journal = {Environmental science & technology}, volume = {}, number = {}, pages = {}, doi = {10.1021/acs.est.4c09948}, pmid = {39699575}, issn = {1520-5851}, abstract = {"Candidatus Accumulibacter" is a unique and pivotal genus of polyphosphate-accumulating organisms prevalent in wastewater treatment plants and plays mainstay roles in the global phosphorus cycle. However, the efforts to fully understand their genetic and metabolic characteristics are largely hindered by major limitations in existing sequence-based annotation methods. Here, we reported an integrated approach combining pangenome analysis, protein structure prediction and clustering, and meta-omic characterization, to uncover genetic and metabolic traits previously unexplored for Ca. Accumulibacter. The identification of a previously overlooked pyrophosphate-fructose 6-phosphate 1-phosphotransferase gene (pfp) suggested that all Ca. Accumulibacter encoded a complete Embden-Meyerhof-Parnas pathway. A homologue of the phosphate-specific transport system accessory protein (PhoU) was suggested to be an inorganic phosphate transport (Pit) accessory protein (Pap) conferring effective and efficient phosphate transport. Additional lineage members were found to encode complete denitrification pathways. A pipeline was built, generating a pan-Ca. Accumulibacter annotation reference database, covering >200,000 proteins and their encoding genes. Benchmarking on 27 Ca. Accumulibacter genomes showed major improvement in the average annotation coverage from 51% to 82%. This pipeline is readily applicable to diverse cultured and uncultured bacteria to establish high-coverage annotation reference databases, facilitating the exploration of genomic dark matter in the bacterial domain.}, } @article {pmid39696427, year = {2024}, author = {Sarashetti, P and Lipovac, J and Tomas, F and Šikić, M and Liu, J}, title = {Evaluating data requirements for high-quality haplotype-resolved genomes for creating robust pangenome references.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {312}, pmid = {39696427}, issn = {1474-760X}, support = {IP-2018-01-5886//Hrvatska Zaklada za Znanost/ ; KK.01.1.1.01.0009//European Regional Development Fund/ ; MOH-000588-01//National Precision Medicine Program/ ; }, mesh = {*Haplotypes ; Humans ; Genomics/methods ; Genome, Human ; }, abstract = {BACKGROUND: Long-read technologies from Pacific Biosciences (PacBio) and Oxford Nanopore Technologies (ONT) have transformed genomics research by providing diverse data types like HiFi, Duplex, and ultra-long ONT. Despite recent strides in achieving haplotype-phased gapless genome assemblies using long-read technologies, concerns persist regarding the representation of genetic diversity, prompting the development of pangenome references. However, pangenome studies face challenges related to data types, volumes, and cost considerations for each assembled genome, while striving to maintain sensitivity. The absence of comprehensive guidance on optimal data selection exacerbates these challenges.

RESULTS: Our study evaluates recommended data types and volumes required to establish a robust de novo genome assembly pipeline for population-level pangenome projects, extensively examining performance between ONT's Duplex and PacBio HiFi datasets in the context of achieving high-quality phased genomes with enhanced contiguity and completeness. The results show that achieving chromosome-level haplotype-resolved assembly requires 20 × high-quality long reads such as PacBio HiFi or ONT Duplex, combined with 15-20 × of ultra-long ONT per haplotype and 10 × of long-range data such as Omni-C or Hi-C. High-quality long reads from both platforms yield assemblies with comparable contiguity, with HiFi excelling in phasing accuracies, while Duplex generates more T2T contigs.

CONCLUSION: Our study provides insights into optimal data types and volumes for robust de novo genome assembly in population-level pangenome projects. Reassessing the recommended data types and volumes in this study and aligning them with practical economic limitations are vital to the pangenome research community, contributing to their efforts and pushing genomic studies with broader impacts.}, } @article {pmid39695301, year = {2024}, author = {Rey, E and Abrouk, M and Dufau, I and Rodde, N and Saber, N and Cizkova, J and Fiene, G and Stanschewski, C and Jarvis, DE and Jellen, EN and Maughan, PJ and von Baer, I and Troukhan, M and Kravchuk, M and Hribova, E and Cauet, S and Krattinger, SG and Tester, M}, title = {Genome assembly of a diversity panel of Chenopodium quinoa.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {1366}, pmid = {39695301}, issn = {2052-4463}, mesh = {*Chenopodium quinoa/genetics ; *Genome, Plant ; Genetic Variation ; }, abstract = {Quinoa (Chenopodium quinoa) is an important crop for the future challenges of food and nutrient security. Deep characterization of quinoa diversity is needed to support the agronomic improvement and adaptation of quinoa as its worldwide cultivation expands. In this study, we report the construction of chromosome-scale genome assemblies of eight quinoa accessions covering the range of phenotypic and genetic diversity of both lowland and highland quinoas. The assemblies were produced from a combination of PacBio HiFi reads and Bionano Saphyr optical maps, with total assembly sizes averaging 1.28 Gb with a mean N50 of 71.1 Mb. Between 43,733 and 48,564 gene models were predicted for the eight new quinoa genomes, and on average, 66% of each quinoa genome was classified as repetitive sequences. Alignment between the eight genome assemblies allowed the identification of structural rearrangements including inversions, translocations, and duplications. These eight novel quinoa genome assemblies provide a resource for association genetics, comparative genomics, and pan-genome analyses for the discovery of genetic components and variations underlying agriculturally important traits.}, } @article {pmid39694857, year = {2024}, author = {Milia, S and Leonard, A and Mapel, XM and Bernal Ulloa, SM and Drögemüller, C and Pausch, H}, title = {Taurine pangenome uncovers a segmental duplication upstream of KIT associated with depigmentation in white-headed cattle.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.279064.124}, pmid = {39694857}, issn = {1549-5469}, abstract = {Cattle have been selectively bred for coat color, spotting, and depigmentation patterns. The assumed autosomal dominant inherited genetic variants underlying the characteristic white head of Fleckvieh, Simmental, and Hereford cattle have not been identified yet, although the contribution of structural variation upstream the KIT gene has been proposed. Here, we construct a graph pangenome from 24 haplotype assemblies representing seven taurine cattle breeds to identify and characterize the white head-associated locus for the first time based on long-read sequencing data and pangenome analyses. We introduce a pangenome-wide association mapping approach which examines assembly path similarities within the graph to reveal an association between two most likely serial alleles of a complex structural variant 66 kb upstream KIT and facial depigmentation. The complex structural variant contains a variable number of tandemly duplicated 14.3 kb repeats, consisting of LTRs, LINEs, and other repetitive elements, leading to misleading alignments of short and long reads when using a linear reference. We align 250 short-read sequencing samples spanning 15 cattle breeds to the pangenome graph, further validating that the alleles of the structural variant segregate with head depigmentation. We estimate an increased count of repeats in Hereford relative to Simmental and other white-headed cattle breeds from the graph alignment coverage, suggesting a large under-assembly in the current Hereford-based cattle reference genome which had fewer copies. Our work shows that exploiting assembly path similarities within graph pangenomes can reveal trait-associated complex structural variants.}, } @article {pmid39694196, year = {2024}, author = {Panigrahi, M and Rajawat, D and Nayak, SS and Jain, K and Nayak, A and Rajput, AS and Sharma, A and Dutt, T}, title = {A Comprehensive Review on Genomic Insights and Advanced Technologies for Mastitis Prevention in Dairy Animals.}, journal = {Microbial pathogenesis}, volume = {}, number = {}, pages = {107233}, doi = {10.1016/j.micpath.2024.107233}, pmid = {39694196}, issn = {1096-1208}, abstract = {Mastitis, is a multi-etiological disease that significantly impacts milk production and reproductive efficiency. This is highly prevalent in dairy populations subjected to intensive selection for higher milk yield and where inbreeding is common. The issue is amplified by climate change and poor hygiene management, making disease control challenging. Key obstacles include antibiotic resistance, maximum residue levels, horizontal gene transfer, and limited success in breeding for resistance. Predictive genomics offers a promising solution for mastitis prevention by identifying genetic traits linked with susceptibility to mastitis. This review compiles the research and findings on genomics and its allied approaches such as pan-genomics, epigenetics, proteomics, and transcriptomics for diagnosing, understanding, and treating mastitis. In dairy production, artificial intelligence (AI), particularly deep learning (DL) techniques like convolutional neural networks (CNNs), has demonstrated significant potential to enhance milk production and improve farm profitability. It highlights the integration of advanced technologies like machine learning (ML), CRISPR, and pan-genomics to enhance our knowledge of mastitis epidemiology, pathogen evolution, and the development of more effective diagnostic, preventive, and therapeutic strategies for dairy herds. Genomic advancements provide critical insights into the complexities of mastitis, offering new avenues for understanding its dynamics. Integrating these findings with key predisposing factors can drive targeted prevention and more effective disease management.}, } @article {pmid39687382, year = {2024}, author = {Shayerul Abedin Shayer, KM and Shuvo, SR and Jabeen, I and Hossain, M and Islam, S}, title = {Draft genome sequence data of Serratia marcescens strain harboring blaNDM-7 from Dhaka, Bangladesh.}, journal = {Data in brief}, volume = {57}, number = {}, pages = {111133}, doi = {10.1016/j.dib.2024.111133}, pmid = {39687382}, issn = {2352-3409}, abstract = {Here, the draft genome sequence of a multi-drug resistant (MDR) Serratia marcescens strain BMD28, isolated from a clinical source from Dhaka, Bangladesh, has been reported. The sequence raw read files were generated using Illumina sequencing technology utilizing genomic DNA from the pure culture of this strain. The strain has a genome size of around 5.4 million base pairs, a GC content of 59.70 %, and 5,141 coding sequences. We conducted genomic studies using several bioinformatics tools focusing on resistance genes, virulence factors, toxin-antitoxin systems, and pangenome analysis. Strain BMD28 harbored the blaNDM-7 gene in an IncX3 plasmid. A phylogenomic study with S. marcescens strains isolated worldwide revealed that our strain is in the same clade as other strains reported in Bangladesh. The data can be used primarily to understand the genomic content, epidemiology, and evolution of S. marcescens in Bangladesh. The genome sequence data of BMD28 has been deposited in the NCBI database under BioSample accession number SAMN41260295.}, } @article {pmid39684382, year = {2024}, author = {Salgado-Morales, R and Barba-Xochipa, K and Martínez-Ocampo, F and Dantán-González, E and Hernández-Mendoza, A and Quiterio-Trenado, M and Rodríguez-Santiago, M and Rivera-Ramírez, A}, title = {Pangenome-Wide Association Study in the Chlamydiaceae Family Reveals Key Evolutionary Aspects of Their Relationship with Their Hosts.}, journal = {International journal of molecular sciences}, volume = {25}, number = {23}, pages = {}, doi = {10.3390/ijms252312671}, pmid = {39684382}, issn = {1422-0067}, mesh = {*Chlamydiaceae/genetics ; Animals ; *Genome, Bacterial ; *Genome-Wide Association Study ; Humans ; *Chlamydia/genetics/classification ; Phylogeny ; Host-Pathogen Interactions/genetics ; Evolution, Molecular ; Chlamydophila/genetics ; }, abstract = {The Chlamydiaceae are a family of obligate intracellular bacteria known for their unique biphasic developmental cycle. Chlamydial are associated with various host organisms, including humans, and have been proposed as emerging pathogens. Genomic studies have significantly enhanced our understanding of chlamydial biology, host adaptation, and evolutionary processes. In this study, we conducted a complete pangenome association analysis (pan-GWAS) using 101 genomes from the Chlamydiaceae family to identify differentially represented genes in Chlamydia and Chlamydophila, revealing their distinct evolutionary strategies for interacting with eukaryotic hosts. Our analysis identified 289 genes with differential abundance between the two clades: 129 showed a strong association with Chlamydia and 160 with Chlamydophila. Most genes in Chlamydia were related to the type III secretion system, while Chlamydophila genes corresponded to various functional categories, including translation, replication, transport, and metabolism. These findings suggest that Chlamydia has developed a high dependence on mammalian cells for replication, facilitated by a complex T3SS for intracellular manipulation. In contrast, the metabolic and functional diversity in Chlamydophila allows it to colonize a broad range of hosts, such as birds, reptiles, amphibians, and mammals, making it a less specialized clade.}, } @article {pmid39675005, year = {2024}, author = {Rubio, A and Garzón, A and Moreno-Rodríguez, A and Pérez-Pulido, AJ}, title = {Biological warfare between two bacterial viruses in a defense archipelago sheds light on the spread of CRISPR-Cas systems.}, journal = {Cell reports}, volume = {43}, number = {12}, pages = {115085}, doi = {10.1016/j.celrep.2024.115085}, pmid = {39675005}, issn = {2211-1247}, abstract = {CRISPR-Cas systems are adaptive immunity systems of bacteria and archaea that prevent infection by viruses and other external mobile genetic elements. It is currently known that these defense systems can be co-opted by the same viruses. We have found one of these viruses in the opportunistic pathogen Acinetobacter baumannii, and the same system has been also found in an integration hotspot of the bacterial genome that harbors other multiple defense systems. The CRISPR-Cas system appears to especially target another virus that could compete with the system itself for the same integration site. This virus is prevalent in strains of the species belonging to the so-called Global Clone 2, which causes the most frequent outbreaks worldwide. Knowledge of this viral warfare involving antiviral systems could be useful in the fight against infections caused by bacteria, and it would also shed light on how CRISPR-Cas systems expand in bacteria.}, } @article {pmid39673077, year = {2024}, author = {Alkemade, JA and Hohmann, P and Messmer, MM and Barraclough, TG}, title = {Comparative Genomics Reveals Sources of Genetic Variability in the Asexual Fungal Plant Pathogen Colletotrichum lupini.}, journal = {Molecular plant pathology}, volume = {25}, number = {12}, pages = {e70039}, pmid = {39673077}, issn = {1364-3703}, support = {//Bundesamt für Landwirtschaft/ ; //Calleva Research Centre for Evolution and Human Science/ ; 727230//Horizon 2020 Framework Programme/ ; 17.00090//Staatssekretariat für Bildung, Forschung und Innovation/ ; RYC2022-037997//MICIU/AEI/10.13039/501100011033 and FSE+/ ; }, mesh = {*Colletotrichum/genetics/pathogenicity ; *Genetic Variation ; *Genome, Fungal ; *Genomics ; *Phylogeny ; Plant Diseases/microbiology ; DNA Transposable Elements/genetics ; Reproduction, Asexual/genetics ; Genome Size ; }, abstract = {Fungal plant pathogens cause major crop losses worldwide, with many featuring compartmentalised genomes that include both core and accessory regions, which are believed to drive adaptation. The highly host-specific fungus Colletotrichum lupini greatly impacts lupin (Lupinus spp.) cultivation. This pathogen is part of clade 1 of the C. acutatum species complex and comprises four genetically uniform, presumably clonal, lineages (I-IV). Despite this, variation in virulence and morphology has been observed within these lineages. To investigate the potential sources of genetic variability in this asexual fungus, we compared the genomes of 16 C. lupini strains and 17 related Colletotrichum species. Phylogenomics confirmed the presence of four distinct lineages, but further examination based on genome size, gene content, transposable elements (TEs), and deletions revealed that lineage II could be split into two groups, II-A and II-B. TE content varied between lineages and correlated strongly with genome size variation, supporting a role for TEs in genome expansion in this species. Pangenome analysis revealed a highly variable accessory genome, including a minichromosome present in lineages II, III, and IV, but absent in lineage I. Accessory genes and effectors appeared to cluster in proximity to TEs. Presence/absence variation of putative effectors was lineage-specific, suggesting that these genes play a crucial role in determining host range. Notably, no effectors were found on the TE-rich minichromosome. Our findings shed light on the potential mechanisms generating genetic diversity in this asexual fungal pathogen that could aid future disease management.}, } @article {pmid39671861, year = {2024}, author = {Buzzanca, D and Giordano, M and Chiarini, E and Ferrocino, I and Cocolin, L and Zeppa, G and Alessandria, V}, title = {Delving into Roccaverano PDO cheese: A comprehensive examination of microbial diversity and flavour profiles compared to non-PDO cheeses.}, journal = {International journal of food microbiology}, volume = {429}, number = {}, pages = {111014}, doi = {10.1016/j.ijfoodmicro.2024.111014}, pmid = {39671861}, issn = {1879-3460}, abstract = {Roccaverano Protected Designation of Origin (PDO) is a fresh soft cheese produced in Roccaverano area (Italy). This study aimed to evaluate Roccaverano PDO microbiota, together with aromatic profile and sensory analysis to be compared with 15 non-PDO cheeses of the same type. Microbiota was evaluated through shotgun metagenomics sequencing, while GC-MS analysis was conducted to study volatile organic compounds (VOCs) presence and concentration. Sensory analyses were conducted through ONAF (Italian National Organization of Cheese Tasters) evaluation parameters followed by flash profile sensory analysis of selected cheeses. The results demonstrated Lactococcus lactis predominance in both non-PDO and PDO cheeses, while Streptococcus thermophilus was more abundant in non-PDO group. A higher abundance of Kluyveromyces lactis was observed in Roccaverano PDO, which exhibited greater fungal diversity compared to non-PDO cheeses. Metagenome-Assembled Genomes of 26 L. lactis and 19 Leuconostoc mesenteroides showed absence of significant differences in terms of average nucleotide identity and pangenomes partitions. The ONAF sensory evaluation demonstrated a higher average score of Roccaverano PDO group. Flash profile analysis demonstrated that lactic aroma/odour, acid, astringent, vegetal odour, exotic fruit and fermented aroma, hazelnut flavour and sweet were associated with high ONAF scores. The concentration of butanoic acid, 2-methyl-, ethyl ester and butanoic acid, 3-methyl- (sweat, acid, rancid related) were higher in PDO cheeses, while reads related to butanoate metabolism were less abundant compared to non-PDO samples. Several fungal species (included K. lactis) were associated with astringents, acid and chalky flavours. Roccaverano PDO demonstrates unique characteristics even maintaining a certain degree of variability between samples.}, } @article {pmid39670914, year = {2024}, author = {Yang, Y and Yan, X and Haley, BJ and Li, C and Nou, X}, title = {Genomic Comparison of Reoccurring, Emerging, and Persistent (REP) Shiga Toxin-Producing Escherichia coli O157:H7.}, journal = {Foodborne pathogens and disease}, volume = {}, number = {}, pages = {}, doi = {10.1089/fpd.2024.0144}, pmid = {39670914}, issn = {1556-7125}, abstract = {Escherichia coli O157:H7 strains associated with several recent (2017-2020) multi-state outbreaks linked to leafy green vegetables have been characterized as "reoccurring, emerging, and persistent" (REP). Our recent unpublished work demonstrated that the REP strains had significantly enhanced potential for biofilm formation. In this study, comparative genomic analyses were conducted for a better understanding of the mechanisms behind the enhanced biofilm formation, and thereby potentially increased environmental fitness, by the REP strains. Phylogenetically, the recent outbreak strains formed two distinct clusters represented by REPEXH01 and REPEXH02. Compared with EDL933 and other previous outbreak reference strains, the REP strains (clustering with REPEXH02) exhibiting strong biofilm formation were found to have acquired two genes encoding proteins of unknown functions (hypothetical proteins) and lost certain prophage-related genes. In addition, several single nucleotide polymorphisms in genes related to biofilm formation were identified.}, } @article {pmid39670058, year = {2024}, author = {Kantor, EJH and Robicheau, BM and Tolman, J and Archibald, JM and LaRoche, J}, title = {Metagenomics reveals the genetic diversity between sublineages of UCYN-A and their algal host plastids.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae150}, pmid = {39670058}, issn = {2730-6151}, abstract = {UCYN-A (or Cand. Atelocyanobacterium thalassa) has been recognized as a globally distributed, early stage, nitrogen-fixing organelle (the "nitroplast") of cyanobacterial origin present in the haptophyte alga Braarudosphaera bigelowii. Although the nitroplast was recognized as UCYN-A2, not all sublineages of UCYN-A have been confirmed as nitroplasts, and full genomes are still lacking for several known sublineages. We investigated the differences between UCYN-A sublineages by sequencing and assembly of metagenomic sequences acquired from cultured biomass from NW Atlantic seawater, which yielded near-complete Metagenome Assembled Genomes (MAGs) corresponding to UCYN-A1, -A4, and the plastid of the UCYN-A4-associated B. bigelowii. Weekly time-series data paired with the recurrence of specific microbes in cultures used for metagenomics gave further insight into the microbial community associated with the algal/UCYN-A complex. The UCYN-A1 MAG was found to have 99% average nucleotide identity (ANI) to the Pacific-derived reference genome despite its Atlantic Ocean origin. Comparison of the UCYN-A4 MAG (the initial genome sequenced from this sublineage) to other genomes showed that UCYN-A4 is sufficiently genetically distinct from both UCYN-A1 and UCYN-A2 (ANI of ~83% and ~85%, respectively) to be considered its own sublineage, but more similar to UCYN-A2 than -A1, supporting its possible classification as a nitroplast. The B. bigelowii plastid sequence was compared with published plastid sequences (sharing 78% ANI with Chrysochromulina parva) adding to our understanding of genomic variation across Haptophyta organelles and emphasizing the need for further full genomic sequencing of B. bigelowii genotypes and their organelles.}, } @article {pmid39667962, year = {2024}, author = {Zeng, W and Luo, M and Du, P and Li, Z and Peng, Y and Wang, M and Zhao, W and Zhang, H and Li, Y and Luo, P and Wu, Y and Xu, J and Li, X and Lu, X and Kan, B}, title = {bla NDM-1 Carried by a Transferable Plasmid in a Salmonella Strain Isolated from Healthy Individuals.}, journal = {Biomedical and environmental sciences : BES}, volume = {37}, number = {11}, pages = {1252-1261}, doi = {10.3967/bes2024.104}, pmid = {39667962}, issn = {2214-0190}, mesh = {*beta-Lactamases/genetics ; *Plasmids/genetics ; Humans ; Anti-Bacterial Agents/pharmacology ; China ; Microbial Sensitivity Tests ; Salmonella typhimurium/genetics/drug effects/isolation & purification ; Salmonella/genetics/drug effects/isolation & purification ; Salmonella Infections/microbiology ; }, abstract = {OBJECTIVE: Our study aimed to conduct genomic characterization of Salmonella strains carrying the bla NDM-1 gene in the intestinal tract of healthy individuals. The objectives were to underscore the importance of genomic surveillance for drug resistance in both commensal and pathogenic bacteria among healthy populations, and to establish protocols for regulating drug resistance plasmids based on the completion of a comprehensive map of drug resistance plasmid genomes.

METHODS: We performed antimicrobial susceptibility testing and employed second- and third-generation sequencing techniques to analyze Salmonella strains harboring the bla NDM-1 gene, to surveil drug-resistant bacteria in the intestines of healthy subjects. Sequence comparison was conducted using both core- and pan-genome approaches. Concurrently, conjugation experiments were carried out to assess the efficiency of plasmid transfer.

RESULTS: We isolated a carbapenem-resistant Salmonella enterica serovar Typhimurium strain from a healthy food worker in China. This strain harbored an IncHI2/IncHI2A plasmid carrying bla NDM-1 along with multiple antibiotic resistance genes (ARGs). Our findings highlight the potential for asymptomatic carriers to facilitate the transmission of ARGs. Pan-genomic analysis revealed that bla NDM-1-positive plasmids could traverse bacterial species barriers, facilitating cross-host transmission.

CONCLUSION: This study marks the first detection of bla NDM-1 in Salmonella strains isolated from healthy individuals. We underscore the risk associated with the transmission of conjugative hybrid plasmids carrying bla NDM-1, which have the potential to be harbored and transmitted among healthy individuals. Enhanced surveillance of drug-resistant pathogens and plasmids in the intestinal microbiota of healthy individuals could provide insights into the risk of ARG transmission and pathways for population-wide dissemination via ARG transfer factors.}, } @article {pmid39665690, year = {2024}, author = {Bian, P and Li, J and Zhou, S and Wang, X and Gong, M and Guo, X and Cai, Y and Yang, Q and Fu, J and Li, R and Huang, S and Luo, F and Shah, AM and Lenstra, JA and Mwacharo, JM and Li, R and Ren, G and Wang, X and Li, C and Zheng, W and Jiang, Y and Wang, X}, title = {A graph-based goat pangenome reveals structural variations involved in domestication and adaptation.}, journal = {Molecular biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/molbev/msae251}, pmid = {39665690}, issn = {1537-1719}, abstract = {Pangenomes can facilitate a deeper understanding of genome complexity. Using de novo phased long-read assemblies of eight representative goat breeds, we constructed a graph-based pangenome of goats (Capra hircus) and discovered 113 Mb autosomal novel sequences. Combining this multi-assembly pangenome with low-coverage PacBio HiFi sequences, we constructed a long-read structural variations (SVs) database containing 59,325 SV deletions, 84,910 SV insertions and 24,954 other complex SV alleles. This resource allowed reliable graph-based genotyping from short reads of 79 wild and 1,148 worldwide domestic goats. Selection signal analysis of SV captured a novel immune-related domestication locus containing the galectin-9 gene and extra copies of the ruminant-specific galectin-9-like genes (LGALS9L), which have high tissue specificity. A segmental duplication in domestic goats generates three additional LGALS9L copies. Ancient goat genome sequences show a gradual increase in frequency of this duplication from the Neolithic to the present. Two other newly detected SVs also have higher selection signals than adjacent SNPs, a truncated-LINE1 deletion in EDAR2 associated with cashmere production and a VNTR-related insertion in PAPSS2 linked to high-altitude adaptation. In summary, the multi-assembly goat pangenome and long-read SV database facilitates detecting complex variations that are important in evolution and selection.}, } @article {pmid39664816, year = {2024}, author = {Öztürk, Ü and Mattavelli, M and Ribeca, P}, title = {GIN-TONIC: non-hierarchical full-text indexing for graph genomes.}, journal = {NAR genomics and bioinformatics}, volume = {6}, number = {4}, pages = {lqae159}, pmid = {39664816}, issn = {2631-9268}, abstract = {This paper presents a new data structure, GIN-TONIC (Graph INdexing Through Optimal Near Interval Compaction), designed to index arbitrary string-labelled directed graphs representing, for instance, pangenomes or transcriptomes. GIN-TONIC provides several capabilities not offered by other graph-indexing methods based on the FM-Index. It is non-hierarchical, handling a graph as a monolithic object; it indexes at nucleotide resolution all possible walks in the graph without the need to explicitly store them; it supports exact substring queries in polynomial time and space for all possible walk roots in the graph, even if there are exponentially many walks corresponding to such roots. Specific ad-hoc optimizations, such as precomputed caches, allow GIN-TONIC to achieve excellent performance for input graphs of various topologies and sizes. Robust scalability capabilities and a querying performance close to that of a linear FM-Index are demonstrated for two real-world applications on the scale of human pangenomes and transcriptomes. Source code and associated benchmarks are available on GitHub.}, } @article {pmid39662211, year = {2024}, author = {Ye, YQ and Zhang, XY and Gong, HN and Ye, MQ and Du, ZJ}, title = {Description of Hyphococcus formosus sp. nov. and Hyphococcus lacteus sp. nov., isolated from coastal sediment, and reclassification of Marinicaulis flavus as Hyphococcus luteus nom. nov. and Marinicaulis aureus as Hyphococcus aureus comb. nov.}, journal = {Systematic and applied microbiology}, volume = {48}, number = {1}, pages = {126575}, doi = {10.1016/j.syapm.2024.126575}, pmid = {39662211}, issn = {1618-0984}, abstract = {During a study on sediment bacterial diversity in coastal China, three bacterial strains, DH-69[T], EH-24, and ECK-19[T], were isolated from coastal sediments off Xiaoshi Island, Weihai. These strains were Gram-staining-negative, aerobic, and coccoid to rod-shaped with prosthecae and flagella. Comparison of the 16S rRNA gene showed that they shared the highest identity values with Hyphococcus flavus MCCC 1K03223[T] (96.2-97.6 %), followed by Marinicaulis flavus SY-3-19[T] (95.2-96.8 %) and Marinicaulis aureus HHTR114[T] (95.2-96.2 %). Genome comparisons using average nucleotide identity (ANI) and average amino acid identity (AAI) suggested that the three novel strains and the three related strains belonged to the same genus, with strains DH-69[T], EH-24, and ECK-19[T] identified as two distinct novel species. Pan-genome analysis revealed that 995 core genes were shared among 23 Hyphococcus genomes/MAGs. Secondary metabolites analysis identified a biosynthesis gene cluster for microsclerodermin, a potent antifungal peptide, in the novel strains. Moreover, these newly isolated strains were detected in various ecosystems, with a particular prevalence in marine environments, based on analysis of 500,048 amplicon datasets, underscoring their ecological preference. Based on polyphasic characterizations, strains DH-69[T] and EH-24 represent a novel species of the genus Hyphococcus, for which the name Hyphococcus formosus sp. nov. is proposed with the type strain DH-69[T] (= MCCC 1H00436[T] = KCTC 8010[T]). Strain ECK-19[T] represents another novel Hyphococcus species, for which the name Hyphococcus lacteus sp. nov. is proposed with the type strain ECK-19[T] (= MCCC 1H00435[T] = KCTC 8009[T]). Furthermore, Marinicaulis flavus and Marinicaulis aureus are proposed to be reclassified as Hyphococcus luteus nom. nov. and Hyphococcus aureus comb. nov., respectively, accompanied by an emended description of the genus Hyphococcus.}, } @article {pmid39661475, year = {2024}, author = {Wittouck, S and Eilers, T and van Noort, V and Lebeer, S}, title = {SCARAP: scalable cross-species comparative genomics of prokaryotes.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae735}, pmid = {39661475}, issn = {1367-4811}, abstract = {MOTIVATION: Much of prokaryotic comparative genomics currently relies on two critical computational tasks: pangenome inference and core genome inference. Pangenome inference involves clustering genes from a set of genomes into gene families, enabling genome-wide association studies and evolutionary history analysis. The core genome represents gene families present in nearly all genomes and is required to infer a high-quality phylogeny. For species-level datasets, fast pangenome inference tools have been developed. However, tools applicable to more diverse datasets are currently slow and scale poorly.

RESULTS: Here, we introduce SCARAP, a program containing three modules for comparative genomics analyses: a fast and scalable pangenome inference module, a direct core genome inference module and a module for subsampling representative genomes. When benchmarked against existing tools, the SCARAP pan module proved up to an order of magnitude faster with comparable accuracy. The core module was validated by comparing its result against a core genome extracted from a full pangenome. The sample module demonstrated the rapid sampling of genomes with decreasing novelty. Applied to a dataset of over 31,000 Lactobacillales genomes, SCARAP showcased its ability to derive a representative pangenome. Finally, we applied the novel concept of gene fixation frequency to this pangenome, showing that Lactobacillales genes that are prevalent but rarely fixate in species often encode bacteriophage functions.

The SCARAP toolkit is publicly available at https://github.com/swittouck/scarap.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid39653491, year = {2024}, author = {Armer, VJ and Kroll, E and Darino, M and Smith, DP and Urban, M and Hammond-Kosack, KE}, title = {Navigating the Fusarium species complex: Host-range plasticity and genome variations.}, journal = {Fungal biology}, volume = {128}, number = {8 Pt B}, pages = {2439-2459}, doi = {10.1016/j.funbio.2024.07.004}, pmid = {39653491}, issn = {1878-6146}, mesh = {*Fusarium/genetics/classification ; *Genome, Fungal ; *Host Specificity ; Animals ; Genetic Variation ; Humans ; Phylogeny ; Ecosystem ; Plants/microbiology ; Plant Diseases/microbiology ; }, abstract = {The Ascomycete genus Fusarium, first introduced by Link in 1809, currently consists of 431 species and 3558 unclassified isolates and hybrids (according to NCBI Taxonomy lists). Collectively, these fungi have diverse lifestyles and infection cycles exploiting a wide range of environments, hosts, ecological niches, and nutrient sources. Here, we carried out a pan-Fusarium species review to describe and explore the glamorous, and the less attractive niches, exploited by pathogenic and endophytic species. We survey species that infect plant, human, animal and/or invertebrate hosts, free-living non-pathogenic species dwelling in land, air or water-based natural ecosystems, through to those species that exploit human-modified environments or are cultivated in industrial production systems. Fully sequenced, assembled and annotated reference genomes are already available for 189 Fusarium species, many at chromosome scale. In addition, for some of the world's most important species extensive single species pangenomes or closely related formae speciales genome clusters are readily available. Previous comparative genomics studies have focussed on taxonomically restricted clusters of Fusarium species. We now investigate potential new relationships between these vastly contrasting Fusarium biologies, niches and environmental occupancies and the evolution of their respective genomes.}, } @article {pmid39652592, year = {2024}, author = {Ciccolella, S and Cozzi, D and Della Vedova, G and Kuria, SN and Bonizzoni, P and Denti, L}, title = {Differential quantification of alternative splicing events on spliced pangenome graphs.}, journal = {PLoS computational biology}, volume = {20}, number = {12}, pages = {e1012665}, doi = {10.1371/journal.pcbi.1012665}, pmid = {39652592}, issn = {1553-7358}, abstract = {Pangenomes are becoming a powerful framework to perform many bioinformatics analyses taking into account the genetic variability of a population, thus reducing the bias introduced by a single reference genome. With the wider diffusion of pangenomes, integrating genetic variability with transcriptome diversity is becoming a natural extension that demands specific methods for its exploration. In this work, we extend the notion of spliced pangenomes to that of annotated spliced pangenomes; this allows us to introduce a formal definition of Alternative Splicing (AS) events on a graph structure. To investigate the usage of graph pangenomes for the quantification of AS events across conditions, we developed pantas, the first pangenomic method for the detection and differential analysis of AS events from short RNA-Seq reads. A comparison with state-of-the-art linear reference-based approaches proves that pantas achieves competitive accuracy, making spliced pangenomes effective for conducting AS events quantification and opening future directions for the analysis of population-based transcriptomes.}, } @article {pmid39648684, year = {2024}, author = {MacNish, TR and Al-Mamun, HA and Bayer, PE and McPhan, C and Fernandez, CGT and Upadhyaya, SR and Liu, S and Batley, J and Parkin, IAP and Sharpe, AG and Edwards, D}, title = {Brassica Panache: A multi-species graph pangenome representing presence absence variation across forty-one Brassica genomes.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20535}, doi = {10.1002/tpg2.20535}, pmid = {39648684}, issn = {1940-3372}, support = {DP200100762//Australian Research Council/ ; DP210100296//Australian Research Council/ ; }, abstract = {Brassicas are an economically important crop species that provide a source of healthy oil and vegetables. With the rising population and the impact of climate change on agriculture, there is an increasing need to improve agronomically important traits of crops such as Brassica. The genomes of plant species have significant sequence presence absence variation (PAV), which is a source of genetic variation that can be used for crop improvement, and this species variation can be captured through the construction of pangenomes. Graph pangenomes are a recent reference format that represent the genomic variation with a species or population as alternate paths in a sequence graph. Graph pangenomes contain information on alignment, PAV, and annotation. Here we present the first multi-species graph pangenome for Brassica visualized with pangenome analyzer with chromosomal exploration (Panache).}, } @article {pmid39644982, year = {2024}, author = {Mertz, P and Hentgen, V and Boursier, G and Delon, J and Georgin-Lavialle, S}, title = {Current landscape of monogenic autoinflammatory actinopathies: A literature review.}, journal = {Autoimmunity reviews}, volume = {24}, number = {2}, pages = {103715}, doi = {10.1016/j.autrev.2024.103715}, pmid = {39644982}, issn = {1873-0183}, abstract = {Autoinflammatory diseases (AID) are conditions leading to a hyperactivation of innate immunity without any underlying infection, and may be poly- (e.g. Still's disease) or monogenic. The number of monogenic AID is continuously expanding, with the discovery of novel pathologies and pathophysiological mechanisms, facilitated in part by easier access to pangenomic sequencing. Actinopathies with autoinflammatory manifestations represent a newly emerging subgroup of AID, associated with defects in the regulation of actin cytoskeleton dynamics. These diseases typically manifest in the neonatal period and variably combine a primary immunodeficiency of varying severity, cytopenia (particularly thrombocytopenia), autoinflammatory manifestations primarily affecting the skin and digestive system, as well as atopic and autoimmune features. Diagnosis should be considered primarily when encountering an early-onset autoinflammatory skin and digestive disorder, along with a primary immunodeficiency and either thrombocytopenia or a bleeding tendency. Some of these diseases exhibit specific features, such as a risk of macrophage activation syndrome (MAS) or a predisposition to atopy or lymphoproliferation. The complete pathophysiology of these diseases is not yet fully understood, and further studies are required to elucidate the underlying mechanisms, which could guide therapeutic choices. In most cases, the severity of the conditions necessitates allogeneic marrow transplantation as a treatment option. In this review, we discuss these novel diseases, providing a practical approach based on the main associated biological abnormalities and specific clinical characteristics, with a special focus on the newly described actinopathies DOCK11 and ARPC5 deficiency. Nonetheless, genetic testing remains essential for definitive diagnosis, and various differential diagnoses must be considered.}, } @article {pmid39641568, year = {2024}, author = {Lian, S and Liu, Y and Hu, S and Shen, C and Ma, Y and Yin, P and He, Z}, title = {Genomic insights on cgMLST markers, drug resistance, and urease cluster of Proteus mirabilis strains.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0099224}, doi = {10.1128/spectrum.00992-24}, pmid = {39641568}, issn = {2165-0497}, abstract = {UNLABELLED: Proteus mirabilis, a significant pathogenic bacterium within the Enterobacteriaceae family, is widely distributed across various natural environments. This study conducted a genomic comparison analysis of 1,267 strains of P. mirabilis using extensive genome data from public databases. The objective was to elucidate the pan-genomic structure of P. mirabilis, revealing the composition and distribution of core and accessory gene families among different strains. Additionally, an attempt was made to construct a core genome multilocus sequence typing scheme specific to this species in order to enhance the precision of describing genetic diversity and evolutionary relationships. Furthermore, the study delved into the mechanisms of resistance of P. mirabilis to carbapenems and quinolones. Our findings underscore significant challenges posed by P. mirabilis in terms of antibiotic resistance, with widespread resistance observed particularly against beta-lactams and an increasing trend in resistance to carbapenems and quinolones. These results highlight the severity of P. mirabilis as a pathogen and underscore its rapid evolution and adaptability in developing resistance. This study aims to deepen our understanding of the antibiotic resistance of P. mirabilis, providing important insights for the development of future antimicrobial drugs, promoting effective treatment and control of this pathogen, and mitigating its threat to human health.

IMPORTANCE: The bacterium Proteus mirabilis is a common pathogenic bacterium that is known to cause a variety of human infections. The drug-resistant genes carried by P. mirabilis present a significant challenge to clinical treatment, particularly in regard to the organism's notable resistance to commonly used beta-lactam and quinolone drugs. Furthermore, the prevalence of the urease gene cluster of P. mirabilis at the urease gene level may be associated with the formation of kidney stones. The objective of the study is to analyze the bacterium's drug resistance, urease gene clusters, and gene distribution in genomes in order to facilitate the development of antimicrobial drugs and improve the treatment and control of P. mirabilis infections.}, } @article {pmid39640918, year = {2024}, author = {Baede, VO and Jlassi, O and Lesiczka, PM and Younsi, H and Jansen, HJ and Dachraoui, K and Segobola, J and Ben Said, M and Veneman, WJ and Dirks, RP and Sprong, H and Zhioua, E}, title = {Similarities between Ixodes ricinus and Ixodes inopinatus genomes and horizontal gene transfer from their endosymbionts.}, journal = {Current research in parasitology & vector-borne diseases}, volume = {6}, number = {}, pages = {100229}, pmid = {39640918}, issn = {2667-114X}, abstract = {The taxa Ixodes ricinus and Ixodes inopinatus are sympatric in Tunisia. The genetics underlying their morphological differences are unresolved. In this study, ticks collected in Jouza-Amdoun, Tunisia, were morphologically identified and sequenced using Oxford Nanopore Technologies. Three complete genome assemblies of I. inopinatus and three of I. ricinus with BUSCO scores of ∼98% were generated, including the reconstruction of mitochondrial genomes and separation of both alleles of the TRPA1, TROSPA and calreticulin genes. Deep sequencing allowed the first descriptions of complete bacterial genomes for "Candidatus Midichloria mitochondrii", Rickettsia helvetica and R. monacensis from North Africa, and the discovery of extensive integration of parts of the Spiroplasma ixodetis and "Ca. M. mitochondrii" into the nuclear genome of these ticks. Phylogenetic analyses of the mitochondrial genome, the nuclear genes, and symbionts showed differentiation between Tunisian and Dutch ticks, but high genetic similarities between Tunisian I. ricinus and I. inopinatus. Subtraction of the genome assemblies identified the presence of some unique sequences, which could not be confirmed when screening a larger batch of I. ricinus and I. inopinatus ticks using PCR. Our findings yield compelling evidence that I. inopinatus is genetically highly similar, if not identical, to sympatric I. ricinus. Defined morphological differences might be caused by extrinsic factors such as micro-climatic conditions or bloodmeal composition. Our findings support the existence of different lineages of I. ricinus as well of its symbionts/pathogens from geographically dispersed locations.}, } @article {pmid39633035, year = {2024}, author = {Rocha, BMO and Sabino, YNV and de Almeida, TC and Palacio, FB and Rotta, IS and Dias, VC and da Silva, VL and Diniz, CG and Azevedo, VAC and Brenig, B and Soares, SC and Paiva, AD and Medeiros, JD and Machado, ABF}, title = {Unlocking Probiotic Potential: Genomic Insights into Weissella paramesenteroides UFTM 2.6.1.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {39633035}, issn = {1867-1314}, abstract = {Weissella, a genus of lactic acid bacteria, has diverse beneficial attributes including probiotic activity and biotechnological applications. Therefore, the investigation of the Weissella genus has garnered growing interest. In this study, we sequenced the complete genome of Weissella paramesenteroides UFTM 2.6.1 isolated from unpasteurized cow's milk from the Triângulo Mineiro region and performed probiogenomic analyses. Taxonomic characterization confirmed the identity of W. paramesenteroides. The genome comprises 1926 protein-coding genes, mainly related to cell metabolism, information storage and processing, and cellular processes and signaling. Ninety-nine unique genes associated with probiotic functions were identified in the genome of W. paramesenteroides UFTM 2.6.1, including genes involved in stress response, bacterial persistence in the gastrointestinal tract, and biosynthesis of vitamins. In silico analysis of bacteriocin-related genes identified Pediocin, and subsequent in vitro testing confirmed that W. paramesenteroides UFTM 2.6.1 exhibits antimicrobial activity against Listeria spp. Genomic characterization revealed the presence of the replicon pLCK4 and four prophage regions, one of which was intact. Moreover, no CRISPR-Cas array or associated Cas proteins were found, along with an absence of resistance and virulence genes, suggesting a safety aspect of the evaluated strain. Pan-genome analysis unveiled 204 exclusive genes in the genome of W. paramesenteroides UFTM 2.6.1, which includes metabolism and stress-associated genes. In general, the results indicate probiotic potential of W. paramesenteroides UFTM 2.6.1. Further studies are required to ensure the safety and beneficial effects of this bacterium in vivo, aiming for future applications in the food industry and animal and human medicine.}, } @article {pmid39630499, year = {2024}, author = {Matlock, W and Shaw, LP and Stoesser, N}, title = {Global genomic epidemiology of bla GES-5 carbapenemase-associated integrons.}, journal = {Microbial genomics}, volume = {10}, number = {12}, pages = {}, pmid = {39630499}, issn = {2057-5858}, mesh = {*Integrons/genetics ; *beta-Lactamases/genetics ; *Plasmids/genetics ; *Bacterial Proteins/genetics ; Humans ; *Pseudomonas aeruginosa/genetics/drug effects ; Anti-Bacterial Agents/pharmacology ; Pseudomonas Infections/microbiology/epidemiology ; Genome, Bacterial ; }, abstract = {Antimicrobial resistance (AMR) gene cassettes comprise an AMR gene flanked by short recombination sites (attI and attC or attC and attC). Integrons are genetic elements able to capture, excise and shuffle these cassettes, providing 'adaptation on demand', and can be found on both chromosomes and plasmids. Understanding the patterns of integron diversity may help to understand the epidemiology of AMR genes. As a case study, we examined the clinical resistance gene bla GES-5, an integron-associated class A carbapenemase first reported in Greece in 2004 and since observed worldwide, which to our knowledge has not been the subject of a previous global analysis. Using a dataset comprising all de-duplicated NCBI contigs containing bla GES-5 (n=104), we developed a pangenome graph-based workflow to characterize and cluster the diversity of bla GES-5-associated integrons. We demonstrate that bla GES-5-associated integrons on plasmids are different to those on chromosomes. Chromosomal integrons were almost all identified in Pseudomonas aeruginosa ST235, with a consistent gene cassette content and order. We observed instances where insertion sequence IS110 disrupted attC sites, which might immobilize the gene cassettes and explain the conserved integron structure despite the presence of intI1 integrase promoters, which would typically facilitate capture or excision and rearrangement. The plasmid-associated integrons were more diverse in their gene cassette content and order, which could be an indication of greater integrase activity and 'shuffling' of integrons on plasmids.}, } @article {pmid39628721, year = {2024}, author = {Anjum, A and Tabassum, J and Islam, S and Hassan, AKMI and Jabeen, I and Shuvo, SR}, title = {Deciphering the genomic character of the multidrug-resistant Staphylococcus aureus from Dhaka, Bangladesh.}, journal = {AIMS microbiology}, volume = {10}, number = {4}, pages = {833-858}, pmid = {39628721}, issn = {2471-1888}, abstract = {Staphylococcus aureus is one of the leading agents of nosocomial and community-acquired infections. In this study, we explored the genomic characterization of eight methicillin-resistant clinical isolates of S. aureus from Dhaka, Bangladesh. Notably, all strains were resistant to penicillin, cephalosporins, and monobactams, with partial susceptibility to meropenem and complete susceptibility to amikacin, vancomycin, and tigecycline antibiotics. The strains were found to have an average genome size of 2.73 Mbp and an average of 32.64% GC content. Multi-locus sequence typing analysis characterized the most predominant sequence type as ST361, which belongs to the clonal complex CC361. All isolates harbored the mecA gene, often linked to SCCmec_type IV variants. Multidrug resistance was attributed to efflux pumps NorA, NorC, SdrM, and LmrS alongside genes encoding beta-lactamase BlaZ and factors like ErmC and MepA. Additionally, virulence factors including adsA, sdrC, cap8D, harA, esaA, essC, isdB, geh, and lip were commonly identified. Furthermore, genes associated with heme uptake and clumping were present, highlighting their roles in S. aureus colonization and pathogenesis. Nine secondary metabolite biosynthetic gene clusters were found, of which six were common in all the strains. Numerous toxin-antitoxin systems were predicted, with ParE and ParB-like nuclease domains found to be the most prevalent toxin and antitoxin, respectively. Pan-genome analysis revealed 2007 core genes and 229 unique genes in the studied strains. Finally, the phylogenomic analysis showed that most Bangladeshi strains were grouped into two unique clades. This study provides a genomic and comparative insight into the multidrug resistance and pathogenicity of S. aureus strains, which will play a crucial role in the future antibiotic stewardship of Bangladesh.}, } @article {pmid39628536, year = {2024}, author = {Wang, J and Liang, X and Zhang, W and Khalil, A and Wu, Y and Liu, S and Tahir Ul Qamar, M and Wang, X and Guo, J}, title = {Comparative genomic profiling of CBFs pan-gene family in five yellowhorn cultivars and functional identification of Xg11_CBF11.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1481358}, pmid = {39628536}, issn = {1664-462X}, abstract = {C-repeat binding factor (CBF) transcription factors can activate the expression of a series of cold regulation-related genes, thereby improving the cold resistance of plants. However, no detailed information is known about the biological functions of CBF proteins in yellowhorn (Xanthoceras sorbifolium). In this study, a total of 59 CBF gene family members were identified in five yellowhorn cultivars (WF18, Zhongshi 4, Jinguanxipei 2021, Zhong Guan NO.2, and XsoG11), revealing their intraspecific structural and functional diversity, with 8 core genes present in all cultivars. Phylogenetic and motif analyses highlighted conserved features and species-specific adaptations. Gene duplication events revealed that tandem duplicates are major factors involved in the expansion of this gene family in yellowhorn. Expression profiling under stress conditions demonstrated the involvement of these genes in stress responses. Of particular interest was Xg11_CBF11, which showed strong induction by low-temperature stress. Overexpression of Xg11_CBF11 in Arabidopsis thaliana was performed to validate its cold resistance function. The wild-type and T2 transgenic A. thaliana plants were subjected to low-temperature stress at 4°C for 0, 24, and 48 h, and physiological indexes related to antioxidant enzyme activity, photosynthesis, and cell membrane permeability were determined by comparative test. The results were as follows: the POD and SOD activities of transgenic lines were significantly higher than those of wild-type lines, indicating Xg11_CBF11 improved the adaptability of A. thaliana to low-temperature; The increase of relative conductivity and malondialdehyde, the decrease of chlorophyll content in transgenic lines were smaller than those of wild-type lines, indicating Xg11_CBF11 enhanced the resistance of A. thaliana to low-temperature stress. These results implied that Xg11_CBF11 has a positive regulatory effect on A. thaliana 's response to low-temperature stress.}, } @article {pmid39627700, year = {2024}, author = {Hoque, MN and Mannan, ABA and Hossian, A and Faisal, GM and Hossain, MA and Sultana, M}, title = {Arsenotrophic Achromobacter aegrifaciens strains isolated from arsenic contaminated tubewell water and soil sources shared similar genomic potentials.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {518}, pmid = {39627700}, issn = {1471-2180}, mesh = {*Achromobacter/genetics/isolation & purification/classification/metabolism ; *Soil Microbiology ; *Arsenic/metabolism ; *Phylogeny ; *Genome, Bacterial/genetics ; *Biodegradation, Environmental ; Whole Genome Sequencing ; Soil Pollutants/metabolism ; Bangladesh ; Water Pollutants, Chemical/metabolism ; Genomics ; }, abstract = {BACKGROUND: Arsenic (As), found in diverse ecosystems, poses major public health risks in various parts of the world. Arsenotrophic bacteria in contaminated environments help reduce toxicity by converting arsenite (AsIII) to less harmful arsenate (AsV). We assumed that Achromobacter aegrifaciens strains from As-contaminated tubewell water and soil would share similar genomic characteristics associated with arsenic detoxification and bioremediation. To investigate this, we employed both culture-dependent and culture-independent viz. whole genome sequencing (WGS) methods to thoroughly elucidate the phenotypic and genotypic features of two A. aegrifaciens strains isolated from As-contaminated tubewell water (BAW48) and soil (BAS32) samples collected in the Bogura district of Bangladesh.

RESULTS: Both BAW48 and BAS32 isolates demonstrated As(III) oxidation in the KMNO4 test, which was corroborated by molecular analysis confirming the presence of aioA and arsB genes in both strains. These strains were found to be phylogenetically related to many strains of Achromobacter spp., isolated from biological inorganic reactors, environmental soils, sediments and human clinical samples across diverse geographical regions. Moreover, both strains possessed distinct heavy metal resistance genes conferring resistance to Co, Zn, Cu, Cd, Hg, As, and Cr. Three As gene clusters such as As(III) oxidizing aioBA, As(III) reducing arsRCDAB and the MMA(III) oxidizing ars resistance gene (arsHCsO) cluster were predicted in both genomes of A. aegrifaciens. Further genomic analyses revealed similar profiles in both strains, with mobile genetic elements, antimicrobials and heavy metal resistance genes, virulence genes, and metabolic features. Pangenome and synteny analysis showed that the two genomes are evolutionary distinct from other strains, but closely related to one another.

CONCLUSION: The genomic data confirmed that A. aegrifaciens strains can oxidize As(III) and detoxify heavy metals like As, suggesting their potential for As detoxification and bioremediation. These findings align with our assumption and provide a basis for developing sustainable solutions for bioremediation efforts in As-contaminated environments.}, } @article {pmid39627497, year = {2024}, author = {Jones, DAB and Rybak, K and Hossain, M and Bertazzoni, S and Williams, A and Tan, KC and Phan, HTT and Hane, JK}, title = {Repeat-induced point mutations driving Parastagonospora nodorum genomic diversity are balanced by selection against non-synonymous mutations.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1614}, pmid = {39627497}, issn = {2399-3642}, support = {CUR00023//Grains Research and Development Corporation (Grains Research & Development Corporation)/ ; }, mesh = {*Ascomycota/genetics ; *Point Mutation ; *Genome, Fungal ; Plant Diseases/microbiology ; Polymorphism, Single Nucleotide ; Triticum/microbiology/genetics ; Genetic Variation ; Fungal Proteins/genetics/metabolism ; Repetitive Sequences, Nucleic Acid/genetics ; Selection, Genetic ; }, abstract = {Parastagonospora nodorum is necrotrophic fungal pathogen of wheat with significant genomic resources. Population-level pangenome data for 173 isolates, of which 156 were from Western Australia (WA) and 17 were international, were examined for overall genomic diversity and effector gene content. A heterothallic core population occurred across all regions of WA, with asexually-reproducing clonal clusters in dryer northern regions. High potential for SNP diversity in the form of repeat-induced point mutation (RIP)-like transitions, was observed across the genome, suggesting widespread 'RIP-leakage' from transposon-rich repetitive sequences into non-repetitive regions. The strong potential for RIP-like mutations was balanced by negative selection against non-synonymous SNPs, that was observed within protein-coding regions. Protein isoform profiles of known effector loci (SnToxA, SnTox1, SnTox3, SnTox267, and SnTox5) indicated low-levels of non-synonymous and high-levels of silent RIP-like mutations. Effector predictions identified 186 candidate secreted predicted effector proteins (CSEPs), 69 of which had functional annotations and included confirmed effectors. Pangenome-based effector isoform profiles across WA were distinct from global isolates and were conserved relative to population structure, and may enable new approaches for monitoring crop disease pathotypes.}, } @article {pmid39626271, year = {2024}, author = {Parmigiani, L and Garrison, E and Stoye, J and Marschall, T and Doerr, D}, title = {Panacus: fast and exact pangenome growth and core size estimation.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae720}, pmid = {39626271}, issn = {1367-4811}, abstract = {MOTIVATION: Using a single linear reference genome poses a limitation to exploring the full genomic diversity of a species. The release of a draft human pangenome underscores the increasing relevance of pangenomics to overcome these limitations. Pangenomes are commonly represented as graphs, which can represent billions of base pairs of sequence. Presently, there is a lack of scalable software able to perform key tasks on pangenomes, such as quantifying universally shared sequence across genomes (the core genome) and measuring the extent of genomic variability as a function of sample size (pangenome growth).

RESULTS: We introduce Panacus (pangenome-abacus), a tool designed to rapidly perform these tasks and visualize the results in interactive plots. Panacus can process GFA files, the accepted standard for pangenome graphs, and is able to analyze a human pangenome graph with 110 million nodes in less than one hour.

Panacus is implemented in Rust and is published as Open Source software under the MIT license. The source code and documentation are available at https://github.com/marschall-lab/panacus. Panacus can be installed via Bioconda at https://bioconda.github.io/recipes/panacus/README.html.}, } @article {pmid39621536, year = {2024}, author = {Jayakodi, M and Shim, H and Mascher, M}, title = {What Are We Learning from Plant Pangenomes?.}, journal = {Annual review of plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1146/annurev-arplant-090823-015358}, pmid = {39621536}, issn = {1545-2123}, abstract = {A single reference genome does not fully capture species diversity. By contrast, a pangenome incorporates multiple genomes to capture the entire set of nonredundant genes in a given species, along with its genome diversity. New sequencing technologies enable researchers to produce multiple high-quality genome sequences and catalog diverse genetic variations with better precision. Pangenomic studies have detected structural variants in plant genomes, dissected the genetic architecture of agronomic traits, and helped unravel molecular underpinnings and evolutionary origins of plant phenotypes. The pangenome concept has further evolved into a so-called superpangenome that includes wild relatives within a genus or clade and shifted to graph-based reference systems. Nevertheless, building pangenomes and representing complex structural variants remain challenging in many crops. Standardized computing pipelines and common data structures are needed to compare and interpret pangenomes. The growing body of plant pangenomics data requires new algorithms, huge data storage capacity, and training to help researchers and breeders take advantage of newly discovered genes and genetic variants.}, } @article {pmid39618494, year = {2024}, author = {Negro, G and Semeraro, M and Cook, PR and Marenduzzo, D}, title = {A unified-field theory of genome organization and gene regulation.}, journal = {iScience}, volume = {27}, number = {12}, pages = {111218}, pmid = {39618494}, issn = {2589-0042}, abstract = {Our aim is to predict how often genic and non-genic promoters fire within a cell. We first review a parsimonious pan-genomic model for genome organization and gene regulation, where transcription rate is determined by proximity in 3D space of promoters to clusters containing appropriate factors and RNA polymerases. This model reconciles conflicting results indicating that regulatory mammalian networks are both simple (as over-expressing just 4 transcription factors switches cell state) and complex (as genome-wide association studies show phenotypes like cell type are determined by thousands of loci rarely encoding such factors). We then present 3D polymer simulations, and a proximity formula based on our biological model that enables prediction of transcriptional activities of all promoters in three human cell types. This simple fitting-free formula contains just one variable (distance on the genetic map to the nearest active promoter), and we suggest it can in principle be applied to any organism.}, } @article {pmid39615045, year = {2024}, author = {Valentino, V and De Filippis, F and Marotta, R and Pasolli, E and Ercolini, D}, title = {Genomic features and prevalence of Ruminococcus species in humans are associated with age, lifestyle, and disease.}, journal = {Cell reports}, volume = {43}, number = {12}, pages = {115018}, doi = {10.1016/j.celrep.2024.115018}, pmid = {39615045}, issn = {2211-1247}, abstract = {The genus Ruminococcus is dominant in the human gut, but higher levels of some species, such as R. gnavus, R. torques, and R. bromii, have been linked to health or disease. In this study, we analyzed >9,000 Ruminococcus metagenome-assembled genomes (MAGs) reconstructed from >5,000 subjects and revealed significant links between the prevalence of some species/subspecies and geographic origin, age, lifestyle, and disease, with subspecies prevalent in specific subpopulations showing divergent metabolic potential. Furthermore, Ruminococcus species from Lachnospiraceae encoded for carbohydrate-active enzymes (CAZy) potentially involved in the metabolism of human N- and O-glycans, whereas those from Oscillospiraceae appear to be more adapted toward fiber metabolism. These new findings contribute to elucidating the potential functional role of Ruminococcus in specific lifestyles and diseases and to decipher the diversity and the adaptation of members of this genus to the human gut.}, } @article {pmid39613891, year = {2024}, author = {Foysal, MJ and Momtaz, F and Chowdhury, AMMA and Tanni, AA and Salauddin, A and Hasan, MZ and Mina, SA and Sultana, N and Biswas, SK and Islam, K and Tay, A and Mannan, A}, title = {Whole-Genome Analysis of Multidrug-Resistant Klebsiella pneumoniae Kp04 Reveals Distinctive Antimicrobial and Arsenic-Resistance Genomic Features: A Case Study from Bangladesh.}, journal = {Current microbiology}, volume = {82}, number = {1}, pages = {22}, pmid = {39613891}, issn = {1432-0991}, support = {504-CU-RPC-2023-24/34/2024//Research and Publication Cell, University of Chittagong/ ; }, mesh = {*Klebsiella pneumoniae/genetics/drug effects/isolation & purification ; Bangladesh ; *Drug Resistance, Multiple, Bacterial/genetics ; *Anti-Bacterial Agents/pharmacology ; Humans ; *Genome, Bacterial ; *Whole Genome Sequencing ; *Klebsiella Infections/microbiology ; Microbial Sensitivity Tests ; Arsenic/pharmacology ; beta-Lactamases/genetics ; Genomics ; }, abstract = {Multidrug-resistant bacteria, particularly extended-spectrum-beta-lactamase-producing (ESBL) bacteria, pose a significant global public health challenge. Klebsiella pneumoniae (KPN) is frequently implicated in cases of this resistance. This study aimed to investigate the presence of drug and metal resistance genes in clinical K. pneumoniae isolate Kp04 and comparative genomics of clinical KPN isolates characterized from Bangladesh. A total of 12 isolates were collected. Disk-diffusion assay showed that all five isolates were resistant to 14 out of 21 tested antibiotics and sensitive to only three-tigecycline, imipenem, and meropenem. KPN Kp04 was positive for both blaSHV and blaCTX-M ESBL genes in PCR. All five isolates produced PCR amplicons of the correct size for ampicillin (ampC), tetracycline (tetC), fluoroquinolone (qnrS), and aminoglycoside (aadA) resistance genes. The whole genome of Kp04 was sequenced using the MiSeq Platform (V3 kit, 2 × 300 cycles). We utilized different databases to detect Antibiotic-Resistant Genes (ARGs), virulence factor genes (VFGs), and genomic functional features of the Kp04 strain. Whole-genome sequencing identified 75 ESBL, virulence, and multiple drug-resistant (MDR) genes including blaSHV, tetA, oqxA, oqxB, aadA, sul1-5, and mphA in KPN Kp04 isolate. Pan-genomic analysis of 43 Bangladeshi KPN isolates showed similarities between Dhaka and Chattogram isolates regarding virulence and antibiotic-resistant genes. Our results indicate the transmission of similar virulent KPN strains in Dhaka and Chattogram. This study would provide valuable information about drug sensitivity, antibiotic, and metal resistance features of K. pneumoniae circulated among hospitalized patients in Bangladeshi megacities.}, } @article {pmid39611087, year = {2024}, author = {Aziz, T and Naveed, M and Shabbir, MA and Sarwar, A and Naseeb, J and Zhao, L and Yang, Z and Cui, H and Lin, L and Albekairi, TH}, title = {Unveiling the whole genomic features and potential probiotic characteristics of novel Lactiplantibacillus plantarum HMX2.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1504625}, pmid = {39611087}, issn = {1664-302X}, abstract = {This study investigates the genomic features and probiotic potential of Lactiplantibacillus plantarum HMX2, isolated from Chinese Sauerkraut, using whole-genome sequencing (WGS) and bioinformatics for the first time. This study also aims to find genetic diversity, antibiotic resistance genes, and functional capabilities to help us better understand its food safety applications and potential as a probiotic. L. plantarum HMX2 was cultured, and DNA was extracted for WGS. Genomic analysis comprised average nucleotide identity (ANI) prediction, genome annotation, pangenome, and synteny analysis. Bioinformatics techniques were used to identify CoDing Sequences (CDSs), transfer RNA (tRNA) and ribosomal RNA (rRNA) genes, and antibiotic resistance genes, as well as to conduct phylogenetic analysis to establish genetic diversity and evolution. The study found a significant genetic similarity (99.17% ANI) between L. plantarum HMX2 and the reference strain. Genome annotation revealed 3,242 coding sequences, 65 tRNA genes, and 16 rRNA genes. Significant genetic variety was found, including 25 antibiotic resistance genes. A phylogenetic study placed L. plantarum HMX2 among closely related bacteria, emphasizing its potential for probiotic and food safety applications. The genomic investigation of L. plantarum showed essential genes, including plnJK and plnEF, which contribute to antibacterial action against foodborne pathogens. Furthermore, genes such as MurA, Alr, and MprF improve food safety and probiotic potential by promoting bacterial survival under stress conditions in food and the gastrointestinal tract. This study introduces the new genomic features of L. plantarum HMX2 about specific genetics and its possibility of relevant uses in food security and technologies. These findings of specific genes involved in antimicrobial activity provide fresh possibilities for exploiting this strain in forming probiotic preparations and food preservation methods. The future research should focus on the experimental validation of antibiotic resistance genes, comparative genomics to investigate functional diversity, and the development of novel antimicrobial therapies that take advantage of L. plantarum's capabilities.}, } @article {pmid39609363, year = {2024}, author = {Xu, S and Akhatayeva, Z and Liu, J and Feng, X and Yu, Y and Badaoui, B and Esmailizadeh, A and Kantanen, J and Amills, M and Lenstra, JA and Johansson, AM and Coltman, DW and Liu, GE and Curik, I and Orozco-terWengel, P and Paiva, SR and Zinovieva, NA and Zhang, L and Yang, J and Liu, Z and Wang, Y and Yu, Y and Li, M}, title = {Genetic advancements and future directions in ruminant livestock breeding: from reference genomes to multiomics innovations.}, journal = {Science China. Life sciences}, volume = {}, number = {}, pages = {}, pmid = {39609363}, issn = {1869-1889}, abstract = {Ruminant livestock provide a rich source of products, such as meat, milk, and wool, and play a critical role in global food security and nutrition. Over the past few decades, genomic studies of ruminant livestock have provided valuable insights into their domestication and the genetic basis of economically important traits, facilitating the breeding of elite varieties. In this review, we summarize the main advancements for domestic ruminants in reference genome assemblies, population genomics, and the identification of functional genes or variants for phenotypic traits. These traits include meat and carcass quality, reproduction, milk production, feed efficiency, wool and cashmere yield, horn development, tail type, coat color, environmental adaptation, and disease resistance. Functional genomic research is entering a new era with the advancements of graphical pangenomics and telomere-to-telomere (T2T) gap-free genome assembly. These advancements promise to improve our understanding of domestication and the molecular mechanisms underlying economically important traits in ruminant livestock. Finally, we provide new perspectives and future directions for genomic research on ruminant genomes. We suggest how ever-increasing multiomics datasets will facilitate future studies and molecular breeding in livestock, including the potential to uncover novel genetic mechanisms underlying phenotypic traits, to enable more accurate genomic prediction models, and to accelerate genetic improvement programs.}, } @article {pmid39607778, year = {2024}, author = {Li, H}, title = {BWT construction and search at the terabase scale.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae717}, pmid = {39607778}, issn = {1367-4811}, abstract = {MOTIVATION: Burrows-Wheeler Transform (BWT) is a common component in full-text indices. Initially developed for data compression, it is particularly powerful for encoding redundant sequences such as pangenome data. However, BWT construction is resource intensive and hard to be parallelized, and many methods for querying large full-text indices only report exact matches or their simple extensions. These limitations have hampered the biological applications of full-text indices.

RESULTS: We developed ropebwt3 for efficient BWT construction and query. Ropebwt3 indexed 320 assembled human genomes in 65 hours and indexed 7.3 terabases of commonly studied bacterial assemblies in 26 days. This was achieved using up to 170 gigabytes of memory at the peak without working disk space. Ropebwt3 can find maximal exact matches and inexact alignments under affine-gap penalties, and can retrieve similar local haplotypes matching a query sequence. It demonstrates the feasibility of full-text indexing at the terabase scale.

https://github.com/lh3/ropebwt3.}, } @article {pmid39606685, year = {2024}, author = {Mohabati, R and Rezaei, R and Mohajel, N and Ranjbar, MM and Samimi-Rad, K and Azadmanesh, K and Roohvand, F}, title = {Generation of Optimized Consensus Sequences for Hepatitis C virus (HCV) Envelope 2 Glycoprotein (E2) by a Modified Algorithm: Implication for a Pan-genomic HCV Vaccine.}, journal = {Avicenna journal of medical biotechnology}, volume = {16}, number = {4}, pages = {268-278}, pmid = {39606685}, issn = {2008-2835}, abstract = {BACKGROUND: Despite the success of "direct-acting antivirals" in treating Hepatitis C Virus (HCV) infection, invention of a preventive HCV vaccine is crucial for global elimination of the virus. Recent data indicated the importance of the induction of Pangenomic neutralizing Antibodies (PnAbs) against heterogenic HCV Envelope 2(E2), the cellular receptor binding antigen, by any HCV vaccine candidate. To overcome HCVE2 heterogeneity, "generation of consensus HCVE2 sequences" is proposed. However, Consensus Sequence (CS) generating algorithms such as "Threshold" and "Majority" have certain limitations including "Threshold-rigidity" which leads to induction of undefined residues and insensitivity of the "Majority" towards the "evolutionary cost of residual substitutions".

METHODS: Herein, first a modification to the "Majority" algorithm was introduced by incorporating BLOSUM matrices. Secondly, the HCVE2 sequences generated by the "Fitness" algorithm (using 1698 sequences from genotypes 1, 2, and 3) was compared with those generated by the "Majority" and "Threshold" algorithms using several in silico tools.

RESULTS: Results indicated that only "Fitness" provided completely defined, gapless HCVE2s for all genotypes/subtypes, while considered the evolutionary cost of amino acid replacements (main "Majority/Threshold" limitations) by substitution of several residues within the generated consensuses. Moreover, "Fitness-generated HCVE2 CSs" were superior for antigenic/immunogenic characteristics as an antigen, while their positions within the phylogenetic trees were still preserved.

CONCLUSION: "Fitness" algorithm is capable of generating superior/optimum HCVE2 CSs for inclusion in a pan-genomic HCV vaccine and can be similarly used in CS generation for other highly variable antigens from other heterogenic pathogens.}, } @article {pmid39606674, year = {2024}, author = {Zhang, C and Li, H and Yin, J and Han, Z and Liu, X and Chen, Y}, title = {Pan-genome wide identification and analysis of the SAMS gene family in sunflowers (Helianthus annuus L.) revealed their intraspecies diversity and potential roles in abiotic stress tolerance.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1499024}, pmid = {39606674}, issn = {1664-462X}, abstract = {INTRODUCTION: S-adenosylmethionine (SAM), a key molecule in plant biology, plays an essential role in stress response and growth regulation. Despite its importance, the SAM synthetase (SAMS) gene family in sunflowers (Helianthus annuus L.) remains poorly understood.

METHODS: In this study, the SAMS genes were identified from the sunflower genome. Subsequently, the protein properties, gene structure, chromosomal location, cis-acting elements, collinearity, and phylogeny of the SAMS gene family were analyzed by bioinformatic methods. Finally, the expression patterns of SAMS genes in different tissues, under different hormonal treatment and abiotic stress were analyzed based on transcriptome data and qRT-PCR.

RESULTS: This study identified 58 SAMS genes across nine cultivated sunflower species, which were phylogenetically classified into seven distinct subgroups. Physicochemical properties and gene structure analysis showed that the SAMS genes are tightly conserved between cultivars. Collinearity analysis revealed segmental duplications as the primary driver of gene family expansion. The codon usage bias analysis suggested that natural selection substantially shapes the codon usage patterns of sunflower SAMS genes, with a bias for G/C-ending high-frequency codons, particularly encoding glycine, leucine, and arginine. Analysis of the cis-regulatory elements in promoter regions, implied their potential roles in stress responsiveness. Differential expression patterns for HanSAMS genes were observed in different tissues as well as under hormone treatment or abiotic stress conditions by analyzing RNA-seq data from previous studies and qRT-PCR data in our current study. The majority of genes demonstrated a robust response to BRA and IAA treatments in leaf tissues, with no significant expression change observed in roots, suggesting the response of HanSAMS genes to hormones is tissue-specific. Expression analyses under abiotic stresses demonstrated diverse expression profiles of HanSAMS genes, with HanSAMS5 showing significant upregulation in response to both drought and salt stresses.

DISCUSSION: This comprehensive genomic and expression analysis provides valuable insights into the SAMS gene family in sunflowers, laying a robust foundation for future functional studies and applications in crop improvement for stress resilience.}, } @article {pmid39606487, year = {2024}, author = {Depuydt, L and Renders, L and de Vyver, SV and Veys, L and Gagie, T and Fostier, J}, title = {b-move: Faster Lossless Approximate Pattern Matching in a Run-Length Compressed Index.}, journal = {Research square}, volume = {}, number = {}, pages = {}, doi = {10.21203/rs.3.rs-5367343/v1}, pmid = {39606487}, issn = {2693-5015}, abstract = {Background : Due to the increasing availability of high-quality genome sequences, pan-genomes are gradually replacing single consensus reference genomes in many bioinformatics pipelines to better capture genetic diversity. Traditional bioinformatics tools using the FM-index face memory limitations with such large genome collections. Recent advancements in run-length compressed indices like Gagie et al.'s r-index and Nishimoto and Tabei's move structure, alleviate memory constraints but focus primarily on backward search for MEM-finding. Arakawa et al.'s br-index initiates complete approximate pattern matching using bidirectional search in run-length compressed space, but with significant computational overhead due to complex memory access patterns. Results : We introduce b-move, a novel bidirectional extension of the move structure, enabling fast, cache-efficient, lossless approximate pattern matching in run-length compressed space. It achieves bidirectional character extensions up to 7 times faster than the br-index, closing the performance gap with FM-index-based alternatives. For locating occurrences, b-move performs φ and φ inverse operations up to 7 times faster than the br-index. At the same time, it maintains the favorable memory characteristics of the br-index, for example, all available complete E. coli genomes on NCBI's RefSeq collection can be compiled into a b-move index that fits into the RAM of a typical laptop. Conclusions : b-move proves practical and scalable for pan-genome indexing and querying. We provide a C++ implementation of b-move, supporting efficient lossless approximate pattern matching including locate functionality, available at https://github.com/biointec/b-move under the AGPL-3.0 license.}, } @article {pmid39605465, year = {2024}, author = {Qin, C and Lypaczewski, P and Sayeed, MA and Cuénod, AC and Brinkley, L and Creasy-Marrazzo, A and Cato, ET and Islam, K and Ul Khabir, MI and Bhuiyan, MTR and Begum, Y and Qadri, F and Khan, AI and Nelson, EJ and Shapiro, BJ}, title = {Vibrio cholerae lineage and pangenome diversity varies geographically across Bangladesh over one year.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.11.12.623281}, pmid = {39605465}, issn = {2692-8205}, abstract = {Cholera is a diarrhoeal disease caused by Vibrio cholerae . It remains a major public health challenge in the endemic region around the Bay of Bengal. Over decadal time scales, one lineage typically dominates the others and spreads in global pandemic waves. However, it remains unclear to what extent diverse lineages co-circulate during a single outbreak season. Defining the pool of diversity during finer time scales is important because the selective pressures that impact V. cholerae - namely antibiotics and phages - are dynamic on these time scales. To study the nationwide diversity of V. cholerae , we long-read sequenced 273 V. cholerae genomes from seven hospitals over one year (2018) in Bangladesh. Four major V. cholerae lineages were identified: known lineages BD-1, BD-2a, and BD-2b, and a novel lineage that we call BD-3. In 2022, BD-1 caused a large cholera outbreak in Dhaka, apparently outcompeting BD-2 lineages. We show that, in 2018, BD-1 was already dominant in the five northern regions, including Dhaka, consistent with an origin from India in the north. By contrast, we observed a higher diversity of lineages in the two southern regions near the coast. The four lineages differed in pangenome content, including integrative and conjugative elements (ICEs) and genes involved in resistance to bacteriophages and antibiotics. Notably, BD-2a lacked an ICE and is predicted to be more sensitive to phages and antibiotics, but nevertheless persisted throughout the year-long sampling period. Genes associated with antibiotic resistance in V. cholerae from Bangladesh in 2006 were entirely absent from all lineages in 2018-19, suggesting shifting costs and benefits of encoding these genes. Together, our results highlight the dynamic nature of the V. cholerae pangenome and the geographic structure of its lineage diversity.}, } @article {pmid39605388, year = {2024}, author = {Versoza, CJ and Ehmke, EE and Jensen, JD and Pfeifer, SP}, title = {Characterizing the rates and patterns of de novo germline mutations in the aye-aye (Daubentonia madagascariensis).}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.11.08.622690}, pmid = {39605388}, issn = {2692-8205}, abstract = {Given the many levels of biological variation in mutation rates observed to date in primates - spanning from species to individuals to genomic regions - future steps in our understanding of mutation rate evolution will be aided by both a greater breadth of species coverage across the primate clade, but also by a greater depth as afforded by an evaluation of multiple trios within individual species. In order to help bridge these gaps, we here present an analysis of a species representing one of the most basal splits on the primate tree (aye-ayes), combining whole-genome sequencing of seven parent-offspring trios from a three-generation pedigree with a novel computational pipeline that takes advantage of recently developed pan-genome graphs, thereby circumventing the application of (highly subjective) quality metrics that has previously been shown to result in notable differences in the detection of de novo mutations, and ultimately estimates of mutation rates. This deep sampling has enabled both a detailed picture of parental age effects as well as sex dependency in mutation rates which we here compare with previously studied primates, but has also provided unique insights into the nature of genetic variation in one of the most endangered primates on the planet.}, } @article {pmid39605354, year = {2024}, author = {Eichelman, MC and Meyer, MM}, title = {Assessing the conservation and targets of putative sRNAs in Streptococcus pneumoniae.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.11.14.623631}, pmid = {39605354}, issn = {2692-8205}, abstract = {RNA regulators are often found in complex regulatory networks and may mediate metabolism and virulence in bacteria. Small RNAs (sRNA's), a class of non-coding RNAs that interact with an mRNA transcript via base pairing, modulate translation initiation and mRNA degradation. To better understand the role of sRNAs in pathogenicity several studies identified sRNAs in Streptococcus pneumoniae , however little functional characterization has followed. The goal of this study is threefold: 1) take an inventory of putative sRNAs in S. pneumoniae ; 2) assess the conservation of these sRNAs; and 3) examine their predicted targets. Three previous studies in S. pneumoniae identified 287 putative sRNAs by high-throughput sequencing using a variety of distinct inclusion criteria. This study narrows the candidates to a list of 59 putative sRNAs. BLAST analysis shows that each of the 59 sequences are highly conserved across the S. pneumoniae pangenome while only 5 sRNAs have corresponding sequences with substantial similarity in other members of the Streptococcus genus. We used four RNA-RNA interaction prediction programs (IntaRNA, CopraRNA, sRNARFTarget, and TargetRNA3) to predict targets for each of the 59 putative sRNAs. Across all probable predictions, only seven sRNAs have overlap in the targets predicted by multiple programs, four of which target numerous transposases. Moreover, sRNAs targeting transposases do so with nearly identical and perfect base pairing. One sRNA, named M63 (Spd_sr37), has several probable targets in the CcpA regulon, a network responsible for global catabolite repression, suggesting a possible biological function in control of carbon metabolism. Further, each M63-target interaction exhibits unique base pairing increasing confidence in the biological relevance of the result. This study produces a curated list of S. pneumoniae putative sRNAs whose predicted targets suggest functional significance in transposon and carbon metabolism regulation.}, } @article {pmid39604761, year = {2024}, author = {Chang, HY and Yen, HC and Chu, HA and Kuo, CH}, title = {Population genomics of a thermophilic cyanobacterium revealed divergence at subspecies level and possible adaptation genes.}, journal = {Botanical studies}, volume = {65}, number = {1}, pages = {35}, pmid = {39604761}, issn = {1817-406X}, abstract = {BACKGROUND: Cyanobacteria are diverse phototrophic microbes with ecological importance and potential for biotechnology applications. One species of thermophilic cyanobacteria, Thermosynechococcus taiwanensis, has been studied for biomass pyrolysis, estrogen degradation, and the production of bioethanol, monosaccharide, and phycocyanin. To better understand the diversity and evolution of this species, we sampled across different regions in Taiwan for strain isolation and genomic analysis.

RESULTS: A total of 27 novel strains were isolated from nine of the 12 hot springs sampled and subjected to whole genome sequencing. Including strains studied previously, our genomic analyses encompassed 32 strains from 11 hot springs. Genome sizes among these strains ranged from 2.64 to 2.70 Mb, with an average of 2.66 Mb. Annotation revealed between 2465 and 2576 protein-coding genes per genome, averaging 2537 genes. Core-genome phylogeny, gene flow estimates, and overall gene content divergence consistently supported the within-species divergence into two major populations. While isolation by distance partially explained the within-population divergence, the factors driving divergence between populations remain unclear. Nevertheless, this species likely has a closed pan-genome comprising approximately 3030 genes, with our sampling providing sufficient coverage of its genomic diversity. To investigate the divergence and potential adaptations, we identified genomic regions with significantly lower nucleotide diversity, indicating loci that may have undergone selective sweeps within each population. We identified 149 and 289 genes within these regions in populations A and B, respectively. Only 16 genes were common to both populations, suggesting that selective sweeps primarily targeted different genes in the two populations. Key genes related to functions such as photosynthesis, motility, and ion transport were highlighted.

CONCLUSIONS: This work provides a population genomics perspective on a hot spring cyanobacterial species in Taiwan. Beyond advancing our understanding of microbial genomics and evolution, the strains collected and genome sequences generated in this work provide valuable materials for future development and utilization of biological resources.}, } @article {pmid39604736, year = {2024}, author = {Jiao, C and Xie, X and Hao, C and Chen, L and Xie, Y and Garg, V and Zhao, L and Wang, Z and Zhang, Y and Li, T and Fu, J and Chitikineni, A and Hou, J and Liu, H and Dwivedi, G and Liu, X and Jia, J and Mao, L and Wang, X and Appels, R and Varshney, RK and Guo, W and Zhang, X}, title = {Pan-genome bridges wheat structural variations with habitat and breeding.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {39604736}, issn = {1476-4687}, abstract = {Wheat is the second largest food crop with a very good breeding system and pedigree record in China. Investigating the genomic footprints of wheat cultivars will unveil potential avenues for future breeding efforts[1,2]. Here we report chromosome-level genome assemblies of 17 wheat cultivars that chronicle the breeding history of China. Comparative genomic analysis uncovered a wealth of structural rearrangements, identifying 249,976 structural variations with 49.03% (122,567) longer than 5 kb. Cultivars developed in 1980s displayed significant accumulations of structural variations, a pattern linked to the extensive incorporation of European and American varieties into breeding programmes of that era. We further proved that structural variations in the centromere-proximal regions are associated with a reduction of crossover events. We showed that common wheat evolved from spring to winter types via mutations and duplications of the VRN-A1 gene as an adaptation strategy to a changing environment. We confirmed shifts in wheat cultivars linked to dietary preferences, migration and cultural integration in Northwest China. We identified large presence or absence variations of pSc200 tandem repeats on the 1RS terminal, suggesting its own rapid evolution in the wheat genome. The high-quality genome assemblies of 17 representatives developed and their good complementarity to the 10+ pan-genomes offer a robust platform for future genomics-assisted breeding in wheat.}, } @article {pmid39604663, year = {2024}, author = {Bortoluzzi, C and Mapel, XM and Neuenschwander, S and Janett, F and Pausch, H and Leonard, AS}, title = {Genome assembly of wisent (Bison bonasus) uncovers a deletion that likely inactivates the THRSP gene.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1580}, pmid = {39604663}, issn = {2399-3642}, support = {204654//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung (Swiss National Science Foundation)/ ; }, mesh = {Animals ; *Bison/genetics ; *Genome ; Sequence Deletion ; Cattle/genetics ; }, abstract = {The wisent (Bison bonasus) is Europe's largest land mammal. We produced a HiFi read-based wisent assembly with a contig N50 value of 91 Mb containing 99.7% of the highly conserved single copy mammalian genes which improves contiguity a thousand-fold over an existing assembly. Extended runs of homozygosity in the wisent genome compromised the separation of the HiFi reads into parental-specific read sets, which resulted in inferior haplotype assemblies. A bovine super-pangenome built with assemblies from wisent, bison, gaur, yak, taurine and indicine cattle identified a 1580 bp deletion removing the protein-coding sequence of THRSP encoding thyroid hormone-responsive protein from the wisent and bison genomes. Analysis of 725 sequenced samples across the Bovinae subfamily showed that the deletion is fixed in both Bison species but absent in Bos and Bubalus. The THRSP transcript is abundant in adipose, fat, liver, muscle, and mammary gland tissue of Bos and Bubalus, but absent in bison. This indicates that the deletion likely inactivates THRSP in bison. We show that super-pangenomes can reveal potentially trait-associated variation across phylogenies, but also demonstrate that haplotype assemblies from species that went through population bottlenecks warrant scrutiny, as they may have accumulated long runs of homozygosity that complicate phasing.}, } @article {pmid39602797, year = {2024}, author = {Zhang, J and Nie, F and Luo, F and Wang, J}, title = {Phasing Nanopore genome assembly by integrating heterozygous variations and Hi-C data.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae712}, pmid = {39602797}, issn = {1367-4811}, abstract = {MOTIVATION: Haplotype-resolved genome assemblies serve as vital resources in various research domains, including genomics, medicine, and pangenomics. Algorithms employing Hi-C data to generate haplotype-resolved assemblies are particularly advantageous due to its ready availability. Existing methods primarily depend on mapping quality to filter out uninformative Hi-C alignments which may be susceptible to sequencing errors. Setting a high mapping quality threshold filters out numerous informative Hi-C alignments, whereas a low mapping quality threshold compromises the accuracy of Hi-C alignments. Maintaining high accuracy while retaining a maximum number of Hi-C alignments can be challenging.

RESULTS: In our experiments, heterozygous variations play an important role in filtering uninformative Hi-C alignments. Here, we introduce Diphase, a novel phasing tool that harnesses heterozygous variations to accurately identify the informative Hi-C alignments for phasing and to extend primary/alternate assemblies. Diphase leverages mapping quality and heterozygous variations to filter uninformative Hi-C alignments, thereby enhancing the accuracy of phasing and the detection of switches. To validate its performance, we conducted a comparative analysis of Diphase, FALCON-Phase, and GFAse on various human datasets. The results demonstrate that Diphase achieves a longer phased block N50 and exhibits higher phasing accuracy while maintaining a lower hamming error rate.

AVAILABILITY: The source code of Diphase is available at https://github.com/zhangjuncsu/Diphase.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid39601883, year = {2024}, author = {Mankoti, M and Pandit, NK and Meena, SS and Mohanty, A}, title = {Investigating the genomic and metabolic abilities of PGPR Pseudomonas fluorescens in promoting plant growth and fire blight management.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {110}, pmid = {39601883}, issn = {1617-4623}, mesh = {*Pseudomonas fluorescens/genetics/metabolism ; *Plant Diseases/microbiology ; *Molecular Docking Simulation ; *Rhizosphere ; *Plant Development ; *Erwinia amylovora/genetics/metabolism ; Genome, Bacterial ; Soil Microbiology ; Phenazines/metabolism ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Pseudomonas fluorescens is commonly found in diverse environments and is well known for its metabolic and antagonistic properties. Despite its remarkable attributes, its potential role in promoting plant growth remains unexplored. This study examines these traits across 14 strains residing in diverse rhizosphere environments through pangenome and comparative genome analysis, alongside molecular docking studies against Erwinia amylovora to combat fire blight. Whole genome analysis revealed circular chromosome (6.01-7.07 Mb) with GC content averaging 59.95-63.39%. Predicted genes included 16S rRNA and protein-coding genes ranging from 4435 to 6393 bp and 1527 to 1541 bp, respectively. Pangenome analysis unveiled an open pangenome, shedding light on genetic factors influencing plant growth promotion and biocontrol, including nitrogen fixation, phosphorus solubilization, siderophore production, stress tolerance, flagella biosynthesis, and induced systemic resistance. Furthermore, pyrrolnitrin, phenazine-1-carboxylic acid, pyoluteorin, lokisin, 2,4-diacetylpholoroglucinol and pseudomonic acid were identified. Molecular docking against key proteins of E. amylovora highlighted the high binding affinities of 2,4-diacetylphloroglucinol, pseudomonic acid, and lokisin. These findings underscore the multifaceted role of P. fluorescens in plant growth promotion and biocontrol, with key biomolecules showing promising applications in plant growth and defense against pathogens.}, } @article {pmid39600872, year = {2024}, author = {Song, JM and Long, HB and Ye, M and Yang, BR and Wu, GJ and He, HC and Wang, JL and Li, HW and Li, XG and Deng, DY and Li, B and Yuan, WL}, title = {Genomic characterization of a bla KPC-2-producing IncM2 plasmid harboring transposon ΔTn6296 in Klebsiella michiganensis.}, journal = {Frontiers in cellular and infection microbiology}, volume = {14}, number = {}, pages = {1492700}, pmid = {39600872}, issn = {2235-2988}, mesh = {*beta-Lactamases/genetics ; *Plasmids/genetics ; Humans ; *Whole Genome Sequencing ; China ; *Genome, Bacterial ; *Klebsiella Infections/microbiology ; *DNA Transposable Elements/genetics ; *Klebsiella/genetics/enzymology ; Drug Resistance, Multiple, Bacterial/genetics ; Anti-Bacterial Agents/pharmacology ; Microbial Sensitivity Tests ; Escherichia coli/genetics ; Bacterial Proteins/genetics/metabolism ; Conjugation, Genetic ; }, abstract = {Klebsiella michiganensis is an emerging hospital-acquired bacterial pathogen, particularly strains harboring plasmid-mediated carbapenemase genes. Here, we recovered and characterized a multidrug-resistant strain, bla KPC-2-producing Klebsiella michiganensis LS81, which was isolated from the abdominal drainage fluid of a clinical patient in China, and further characterized the co-harboring plasmid. K. michiganensis LS81 tested positive for the bla KPC-2 genes by PCR sequencing, with bla KPC-2 located on a plasmid as confirmed by S1 nuclease pulsed-field gel electrophoresis combined with Southern blotting. In the transconjugants, the bla KPC-2 genes were successfully transferred to the recipient strain E. coli EC600. Whole-genome sequencing and bioinformatics analysis confirmed that this strain belongs to sequence type 196 (ST196), with a complete genome comprising a 5,926,662bp circular chromosome and an 81,451bp IncM2 plasmid encoding bla KPC-2 (designated pLS81-KPC). The IncM2 plasmid carried multiple β-lactamase genes such as bla TEM-1B, bla CTX-M-3, and bla KPC-2 inserted in truncated Tn6296 with the distinctive core structure ISKpn27-bla KPC-2-ISKpn6. A comparison with 46 K. michiganensis genomes available in the NCBI database revealed that the closest phylogenetic relative of K. michiganensis LS81 is a clinical isolate from a wound swab in the United Kingdom. Ultimately, the pan-genomic analysis unveiled a substantial accessory genome within the strain, alongside significant genomic plasticity within the K. michiganensis species, emphasizing the necessity for continuous surveillance of this pathogen in clinical environments.}, } @article {pmid39597765, year = {2024}, author = {Diabankana, RGC and Zhamalbekova, AA and Shakirova, AE and Vasiuk, VI and Filimonova, MN and Validov, SZ and Safin, RI and Afordanyi, DM}, title = {Genomic Insights of Wheat Root-Associated Lysinibacillus fusiformis Reveal Its Related Functional Traits for Bioremediation of Soil Contaminated with Petroleum Products.}, journal = {Microorganisms}, volume = {12}, number = {11}, pages = {}, doi = {10.3390/microorganisms12112377}, pmid = {39597765}, issn = {2076-2607}, support = {075-15-2021-1395, 25.10.2021 (15.IP.21.0020)//Ministry of Science and Higher Education of the Russian Federation/ ; }, abstract = {The negative ecological impact of industrialization, which involves the use of petroleum products and dyes in the environment, has prompted research into effective, sustainable, and economically beneficial green technologies. For green remediation primarily based on active microbial metabolites, these microbes are typically from relevant sources. Active microbial metabolite production and genetic systems involved in xenobiotic degradation provide these microbes with the advantage of survival and proliferation in polluted ecological niches. In this study, we evaluated the ability of wheat root-associated L. fusiformis MGMM7 to degrade xenobiotic contaminants such as crude oil, phenol, and azo dyes. We sequenced the whole genome of MGMM7 and provided insights into the genomic structure of related strains isolated from contaminated sources. The results revealed that influenced by its isolation source, L. fusiformis MGMM7 demonstrated remediation and plant growth-promoting abilities in soil polluted with crude oil. Lysinibacillus fusiformis MGMM7 degraded up to 44.55 ± 5.47% crude oil and reduced its toxicity in contaminated soil experiments with garden cress (Lepidium sativum L.). Additionally, L. fusiformis MGMM7 demonstrated a significant ability to degrade Congo Red azo dye (200 mg/L), reducing its concentration by over 60% under both static and shaking cultivation conditions. However, the highest degradation efficiency was observed under shaking conditions. Genomic comparison among L. fusiformis strains revealed almost identical genomic profiles associated with xenobiotic assimilation. Genomic relatedness using Average Nucleotide Identity (ANI) and digital DNA-DNA hybridization (DDH) revealed that MGMM7 is distantly related to TZA38, Cu-15, and HJ.T1. Furthermore, subsystem distribution and pangenome analysis emphasized the distinctive features of MGMM7, including functional genes in its chromosome and plasmid, as well as the presence of unique genes involved in PAH assimilation, such as phnC/T/E, which is involved in phosphonate biodegradation, and nemA, which is involved in benzoate degradation and reductive degradation of N-ethylmaleimide. These findings highlight the potential properties of petroleum-degrading microorganisms isolated from non-contaminated rhizospheres and offer genomic insights into their functional diversity for xenobiotic remediation.}, } @article {pmid39596184, year = {2024}, author = {Wang, Z and Wu, Y and Liu, M and Chen, L and Xiao, K and Huang, Z and Zhao, Y and Wang, H and Ding, Y and Lin, X and Zeng, J and Peng, F and Zhang, J and Wang, J and Wu, Q}, title = {The Gene Cluster Cj0423-Cj0425 Negatively Regulates Biofilm Formation in Campylobacter jejuni.}, journal = {International journal of molecular sciences}, volume = {25}, number = {22}, pages = {}, doi = {10.3390/ijms252212116}, pmid = {39596184}, issn = {1422-0067}, support = {2023YFD1801000//National Key Research and Development Program of China/ ; 2020B0301030005//Guangdong Major Project of Basic and Applied Basic Research/ ; 2021TQ06N119//Talent Support Project of Guangdong/ ; 2022GDASZH-2022020402-1//Guangdong Academy of Sciences Project/ ; }, mesh = {*Biofilms/growth & development ; *Campylobacter jejuni/genetics/physiology/growth & development/metabolism ; *Multigene Family ; *Bacterial Proteins/genetics/metabolism ; *Gene Expression Regulation, Bacterial ; Quorum Sensing/genetics ; Flagella/genetics/metabolism ; Fatty Acids/metabolism ; }, abstract = {Campylobacter jejuni (C. jejuni) is a zoonotic foodborne pathogen that is widely distributed worldwide. Its optimal growth environment is microaerophilic conditions (5% O2, 10% CO2), but it can spread widely in the atmospheric environment. Biofilms are thought to play an important role in this process. However, there are currently relatively few research works on the regulatory mechanisms of C. jejuni biofilm formation. In this study, a pan-genome analysis, combined with the analysis of biofilm phenotypic information, revealed that the gene cluster Cj0423-Cj0425 is associated with the negative regulation of biofilm formation in C. jejuni. Through gene knockout experiments, it was observed that the Cj0423-Cj0425 mutant strain significantly increased biofilm formation and enhanced flagella formation. Furthermore, pull-down assay revealed that Cj0424 interacts with 93 proteins involved in pathways such as fatty acid synthesis and amino acid metabolism, and it also contains the quorum sensing-related gene luxS. This suggests that Cj0423-Cj0425 affects fatty acid synthesis and amino acid metabolism, influencing quorum sensing and strain motility, ultimately inhibiting biofilm formation.}, } @article {pmid39593788, year = {2024}, author = {Liu, H and Lin, J and Huang, Y and Duan, Y and Zhu, X}, title = {Genomic Comparisons Revealed the Key Genotypes of Streptomyces sp. CB03234-GS26 to Optimize Its Growth and Relevant Production of Tiancimycins.}, journal = {Bioengineering (Basel, Switzerland)}, volume = {11}, number = {11}, pages = {}, pmid = {39593788}, issn = {2306-5354}, support = {2023SK2071//the science and technology innovation Program of Hunan Province/ ; CX20210112//the Hunan Provincial Innovation Foundation for Postgraduate/ ; 2021zzts0330//the Fundamental Research Funds for the Central Universities of Central South University (CSU)/ ; 81872779//the National Natural Science Foundation of China Grants/ ; 82204256//the National Natural Science Foundation of China Grants/ ; BP0820034//he Chinese Ministry of Education 111 Project/ ; }, abstract = {Strain robustness and titer improvement are major challenges faced in the industrial development of natural products from Streptomyces. Tiancimycins (TNMs) produced by Streptomyces sp. CB03234 are promising anticancer payloads for antibody-drug conjugates, but further development is severely limited by the low titer of TNMs. Despite many efforts to generate various TNMs overproducers, the mechanisms underlying high TNMs production remain to be explored. Herein, genome resequencing and genomic comparisons of different TNMs overproducers were conducted to explore the unique genotypes in CB03234-GS26. Four target genes were selected for further bioinformatic analyses and genetic validations. The results indicated that the inactivation of histidine ammonia-lyase (HAL) showed the most significant effect by blocking the intracellular degradation of histidine to facilitate relevant enzymatic catalysis and thus improve the production of TNMs. Additionally, the potassium/proton antiporter (P/PA) was crucial for intracellular pH homeostasis, and its deficiency severely impaired the alkaline tolerance of the cells. Subsequent pan-genomic analysis suggested that HAL and P/PA are core enzymes that are highly conserved in Streptomyces. Therefore, HAL and P/PA represented novel targets to regulate secondary metabolism and enhance strain robustness and could become potential synthetic biological modules to facilitate development of natural products and strain improvement in Streptomyces.}, } @article {pmid39591974, year = {2024}, author = {Peng, Y and Zhu, J and Wang, S and Liu, Y and Liu, X and DeLeon, O and Zhu, W and Xu, Z and Zhang, X and Zhao, S and Liang, S and Li, H and Ho, B and Ching, JY and Cheung, CP and Leung, TF and Tam, WH and Leung, TY and Chang, EB and Chan, FKL and Zhang, L and Ng, SC and Tun, HM}, title = {A metagenome-assembled genome inventory for children reveals early-life gut bacteriome and virome dynamics.}, journal = {Cell host & microbe}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.chom.2024.10.017}, pmid = {39591974}, issn = {1934-6069}, abstract = {Existing microbiota databases are biased toward adult samples, hampering accurate profiling of the infant gut microbiome. Here, we generated a metagenome-assembled genome inventory for children (MAGIC) from a large collection of bulk and viral-like particle-enriched metagenomes from 0 to 7 years of age, encompassing 3,299 prokaryotic and 139,624 viral species-level genomes, 8.5% and 63.9% of which are unique to MAGIC. MAGIC improves early-life microbiome profiling, with the greatest improvement in read mapping observed in Africans. We then identified 54 candidate keystone species, including several Bifidobacterium spp. and four phages, forming guilds that fluctuated in abundance with time. Their abundances were reduced in preterm infants and were associated with childhood allergies. By analyzing the B. longum pangenome, we found evidence of phage-mediated evolution and quorum sensing-related ecological adaptation. Together, the MAGIC database recovers genomes that enable characterization of the dynamics of early-life microbiomes, identification of candidate keystone species, and strain-level study of target species.}, } @article {pmid39589236, year = {2024}, author = {Granada, M and Cook, E and Sherlock, G and Rosenzweig, F}, title = {Microbe Profile: Candida glabrata - a master of deception.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {11}, pages = {}, doi = {10.1099/mic.0.001518}, pmid = {39589236}, issn = {1465-2080}, mesh = {*Candida glabrata/genetics/classification ; Humans ; *Phylogeny ; *Candidiasis/microbiology ; Genome, Fungal ; Antifungal Agents/pharmacology ; Drug Resistance, Fungal/genetics ; Animals ; }, abstract = {Candida glabrata is a fungal microbe associated with multiple vertebrate microbiomes and their terrestrial environments. In humans, the species has emerged as an opportunistic pathogen that now ranks as the second-leading cause of candidiasis in Europe and North America (Beardsley et al. Med Mycol 2024, 62). People at highest risk of infection include the elderly, immunocompromised individuals and/or long-term residents of hospital and assisted-living facilities. C. glabrata is intrinsically drug-resistant, metabolically versatile and able to avoid detection by the immune system. Analyses of its 12.3 Mb genome indicate a stable pangenome Marcet-Houben et al. (BMC Biol 2022, 20) and phylogenetic affinity with Saccharomyces cerevisiae. Recent phylogenetic analyses suggest reclassifying C. glabrata as Nakaseomyces glabratus Lakashima and Sugita (Med Mycol J 2022, 63: 119-132).}, } @article {pmid39587228, year = {2024}, author = {Balakrishnan, B and Johnson, S and Luckey, D and Marietta, E and Murray, J and Taneja, V}, title = {Small intestinal derived Prevotella histicola simulates biologic as a therapeutic agent.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {29217}, pmid = {39587228}, issn = {2045-2322}, support = {W81XWH-10-1-0257//Congressionally Directed Medical Research Programs/ ; }, mesh = {Animals ; *Prevotella/drug effects/genetics ; Mice ; *Arthritis, Rheumatoid/drug therapy/microbiology ; *Gastrointestinal Microbiome/drug effects ; Humans ; Arthritis, Experimental/drug therapy/microbiology ; Intestine, Small/microbiology/drug effects ; Tumor Necrosis Factor Inhibitors/pharmacology/therapeutic use ; HLA-DQ Antigens/genetics ; Disease Models, Animal ; Male ; }, abstract = {A role of gut microbiome in pathogenesis as well as response to treatment is documented in rheumatoid arthritis. Using a novel duodenal derived Prevotella histicola strain MCI 001, we have shown that it suppresses disease progression in a collagen-induced arthritis (CIA), a model for rheumatoid arthritis (RA) using humanized mice expressing HLA-DQ8 gene in the absence of endogenous class II genes. Here we compared efficacy of P. histicola MCI 001 with tumor necrosis factor inhibitor (TNFi) for treating arthritis. DQ8 arthritic mice treated with P. histicola by oral gavage or TNFi, were compared for disease onset, incidence and severity. We demonstrate that oral treatment with P. histicola mimics treatment with TNFi in arthritic DQ8 mice. A pangenome comparison of our P. histicola MCI 001 with its closest available neighbors depicted it as a novel strain with unique gene sequences that may contribute to immune modulatory effects. Notably, it possesses a unique sequence of an outer membrane protein, BtuB, which is involved in vitamin B12 transport. Our data indicate that P. histicola MC001 is an attractive candidate to prevent the progression of disease in RA patients with ongoing disease.}, } @article {pmid39579164, year = {2024}, author = {Karmakar, M and Sur, S}, title = {Unlocking the Mycobacteroides abscessus pan-genome using computational tools: insights into evolutionary dynamics and lifestyle.}, journal = {Antonie van Leeuwenhoek}, volume = {118}, number = {1}, pages = {30}, pmid = {39579164}, issn = {1572-9699}, mesh = {*Genome, Bacterial ; *Mycobacterium abscessus/genetics/drug effects ; *Phylogeny ; Evolution, Molecular ; Computational Biology/methods ; Bacterial Proteins/genetics/metabolism ; Virulence/genetics ; Virulence Factors/genetics ; Genomics ; }, abstract = {Mycobacteroides abscessus is a non-tuberculous mycobacteria implicated in causing lung infections. It is difficult to control owing to resistance to antibiotics and disinfectants. This work was aimed at comprehending: the pan-genome architecture, evolutionary dynamics, and functionalities of pan-genome components linked to COGs and KEGG. Around 2802 core genes were present in each strain of the M. abscessus genome. The number of accessory genes ranged from 1615 to 2481. The open pan-genome of M. abscessus was attributed to the accessory genes underlining its adaptability in the host. Phylogenetic analysis revealed cluster-based relationships and highlighted factors shaping variability and adaptive capabilities. Transcription, metabolism, and pathogenic genes were vital for M. abscessus lifestyle. The accessory genes contributed to the diverse metabolic capability. The incidence of a significant portion of secondary metabolite biosynthesis genes provided insights for investigating their biosynthetic gene clusters. Additionally, a high proportion of xenobiotic biodegradation genes highlighted potential metabolic capabilities. In silico screening identified a potential vaccine candidate among hypothetical proteins in COGs. Functional analysis of M. abscessus pan-genome components unveiled factors associated with virulence, pathogenicity, infection establishment, persistence, and resistance. Notable amongst them were: MMPL family transporters, PE-PPE domain-containing proteins, TetR family transcriptional regulators, ABC transporters, Type-I, II, III, VII secretion proteins, DUF domain-containing proteins, cytochrome P450, VapC family toxin, virulence factor Mce family protein, type II toxin-antitoxin system. Overall, these results enhanced understanding of the metabolism, host-pathogen dynamics, pathogenic lifestyle, and adaptations. This will facilitate further investigations for combating infections and designing suitable therapies.}, } @article {pmid39577948, year = {2024}, author = {Sheng, H and Zhao, L and Suo, J and Yang, Q and Cao, C and Chen, J and Cui, G and Fan, Y and Ma, Y and Huo, S and Wu, X and Yang, T and Cui, X and Chen, S and Cui, S and Yang, B}, title = {Niche-specific evolution and gene exchange of Salmonella in retail pork and chicken.}, journal = {Food research international (Ottawa, Ont.)}, volume = {197}, number = {Pt 2}, pages = {115299}, doi = {10.1016/j.foodres.2024.115299}, pmid = {39577948}, issn = {1873-7145}, mesh = {Animals ; *Chickens/microbiology ; *Salmonella/genetics ; Swine ; *Food Microbiology ; China ; *Pork Meat/microbiology ; Anti-Bacterial Agents/pharmacology ; Gene Transfer, Horizontal ; Meat/microbiology ; Drug Resistance, Bacterial/genetics ; Genetic Variation ; Red Meat/microbiology ; }, abstract = {Salmonella exhibits extensive genetic diversity, facilitated by horizontal gene transfer occurring within and between species, playing a pivotal role in this diversification. Nevertheless, most studies focus on clinical and farm animal isolates, and research on the pangenome dynamics of Salmonella isolates from retail stage of the animal food supply chain is limited. Here, we investigated the genomes of 950 Salmonella isolates recovered from retail chicken and pork meats in seven provinces and one municipality of China in 2018. We observed a strong correlation between Salmonella sublineage diversity and the accessory genome with meat type, revealing reduced diversity associated with increased resistance. Importantly, genes associated with antibiotic, biocide, and heavy metal resistance were unevenly distributed in Salmonella from retail chicken and pork. Pork Salmonella isolates showed a higher prevalence of copper and silver resistance genes, while chicken Salmonella isolates displayed a significant predominance of genetic determinants associated with cephalosporin and ciprofloxacin resistance. Moreover, co-occurrence patterns of resistance determinants and their interaction with mobile genetic elements also correlated with meat type. In summary, our findings shed light on how Salmonella achieves their ecological niche success driven by evolution and gene changes in the retail stage of the animal food supply chain.}, } @article {pmid39576463, year = {2024}, author = {Frederico, TD and Cunha-Ferreira, IC and Vizzotto, CS and de Sousa, JF and Portugal, MM and Tótola, MR and Krüger, RH and Peixoto, J}, title = {Genomic and taxonomic characterization of the Comamonas sp. nov., a bacterium isolated from Brazilian Cerrado soil.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {39576463}, issn = {1678-4405}, support = {Conselho Nacional de Desenvolvimento Científico e Tecnológico//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; Coordenação de Aperfeiçoamento de Pessoal de Nível Superior//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; Fundação de Apoio à Pesquisa do Distrito Federal//Fundação de Apoio à Pesquisa do Distrito Federal/ ; }, abstract = {A novel strain identified as Comamonas sp. was isolated from the soil of the Brazilian savanna-like Cerrado biome, a global hotspot for biodiversity. Phylogenetic analysis based on 16 S rRNA gene sequences showed that this strain is classified as Betaproteobacteria from the family Comamonadaceae. The digital DNA-DNA hybridization (dDDH) and Average Nucleotide Identity (ANI) results, of respectively 48.6% and < 93%, indicated that Comamonas sp. consists in a new species with Comamonas testosteroni as its closest strain. Comamonas sp. is a Gram-negative, rod-shaped, and non-spore-forming bacterium. Its colonies typically exhibit a round, convex, and irregular shape with a clear color and spotted edges. It is characterized as non-fermenting, aerobic, and motile, presenting both oxidase and catalase activities. The optimal growth parameters for this bacterial strain are 30 °C, a pH range of 5-8, and 0% NaCl. In addition, its fatty acid profile included palmitic acid (C16:0) at 26.94%, 13-Methyltetradecanoic Acid (iso-C15:0) at 10.94%, myristic acid (C14:0) at 8.94%, and a summed feature comprising 16:1 ω7c, 16:1 ω6c, or 16:1 at 15.8%. Genomic analysis of Comamonas sp. revealed a GC content of 62.1% across its 5.6 Mb genome. Phylogenomic and pangenome analyses, along with in silico phenotypic characterization indicate that this strain represents a novel species within the Comamonas genus, which we propose to name Comamonas brasiliensis nov.}, } @article {pmid39576133, year = {2024}, author = {Kim, H and Ahn, J and Kim, J and Kang, H-S}, title = {Metagenomic insights and biosynthetic potential of Candidatus Entotheonella symbiont associated with Halichondria marine sponges.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0235524}, doi = {10.1128/spectrum.02355-24}, pmid = {39576133}, issn = {2165-0497}, abstract = {Korea, being surrounded by the sea, provides a rich habitat for marine sponges, which have been a prolific source of bioactive natural products. Although a diverse array of structurally novel natural products has been isolated from Korean marine sponges, their biosynthetic origins remain largely unknown. To explore the biosynthetic potential of Korean marine sponges, we conducted metagenomic analyses of sponges inhabiting the East Sea of Korea. This analysis revealed a symbiotic association of Candidatus Entotheonella bacteria with Halichondria sponges. Here, we report a new chemically rich Entotheonella variant, which we named Ca. Entotheonella halido. Remarkably, this symbiont makes up 69% of the microbial community in the sponge Halichondira dokdoensis. Genome-resolved metagenomics enabled us to obtain a high-quality Ca. E. halido genome, which represents the largest (12 Mb) and highest quality among previously reported Entotheonella genomes. We also identified the biosynthetic gene cluster (BGC) of the known sponge-derived Halicylindramides from the Ca. E. halido genome, enabling us to determine their biosynthetic origin. This new symbiotic association expands the host diversity and biosynthetic potential of metabolically talented bacterial genus Ca. Entotheonella symbionts.IMPORTANCEOur study reports the discovery of a new bacterial symbiont Ca. Entotheonella halido associated with the Korean marine sponge Halichondria dokdoensis. Using genome-resolved metagenomics, we recovered a high-quality Ca. E. halido MAG (Metagenome-Assembled Genome), which represents the largest and most complete Ca. Entotheonella MAG reported to date. Pangenome and BGC network analyses revealed a remarkably high BGC diversity within the Ca. Entotheonella pangenome, with almost no overlapping BGCs between different MAGs. The cryptic and genetically unique BGCs present in the Ca. Entotheonella pangenome represents a promising source of new bioactive natural products.}, } @article {pmid39575183, year = {2024}, author = {Alnaimat, SM and Abushattal, S and Dmour, SM}, title = {Comparative genomic characterization of Cellulosimicrobium funkei isolate RVMD1 from Ma'an desert rock varnish challenges Cellulosimicrobium systematics.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1445943}, pmid = {39575183}, issn = {1664-302X}, abstract = {Desert environments harbor unique microbial communities. This study focuses on Cellulosimicrobium funkei isolate RVMD1, isolated from rock varnish in the Ma'an Desert. Initial identification was achieved using 16S rRNA gene sequencing, followed by whole-genome sequencing (WGS) for comprehensive characterization. The genome comprises 4,264,015 base pairs (857 contigs) with a high G + C content of 74.59%. A total of 4,449 proteins were predicted. Comparative analysis utilizing OrthoANI, ANI, AAI, and dDDH metrics suggests that RVMD1 belongs to the C. cellulans group, with the highest similarity to C. funkei (97.71% ANI). Phylogenomic analysis of 43 Cellulosimicrobium genomes revealed significant heterogeneity within the genus. Our results challenge current systematics, with C. cellulans potentially representing up to 9 distinct genomospecies. Isolate RVMD1 shows genetic adaptations to its desert environment, including genes for denitrification, oxygen and sulfur cycling, and diverse hydrogen metabolism. Pangenomic analysis uncovered a considerable number of unique genes within RVMD1, highlighting its genetic distinctiveness. Gene family expansions suggest evolution in response to stressors like UV radiation and nutrient limitation. This study represents the first whole-genome analysis of a bacterium isolated from Jordanian rock varnish, emphasizing the value of WGS in understanding microbial diversity and adaptation in extreme environments.}, } @article {pmid39574409, year = {2024}, author = {Sun, B and Pashkova, L and Pieters, PA and Harke, AS and Mohite, OS and Santos, A and Zielinski, DC and Palsson, BO and Phaneuf, PV}, title = {PanKB: An interactive microbial pangenome knowledgebase for research, biotechnological innovation, and knowledge mining.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae1042}, pmid = {39574409}, issn = {1362-4962}, support = {//Novo Nordisk Foundation/ ; NNF20CC0035580//Technical University of Denmark/ ; }, abstract = {The exponential growth of microbial genome data presents unprecedented opportunities for unlocking the potential of microorganisms. The burgeoning field of pangenomics offers a framework for extracting insights from this big biological data. Recent advances in microbial pangenomic research have generated substantial data and literature, yielding valuable knowledge across diverse microbial species. PanKB (pankb.org), a knowledgebase designed for microbial pangenomics research and biotechnological applications, was built to capitalize on this wealth of information. PanKB currently includes 51 pangenomes from 8 industrially relevant microbial families, comprising 8402 genomes, over 500 000 genes and over 7M mutations. To describe this data, PanKB implements four main components: (1) Interactive pangenomic analytics to facilitate exploration, intuition, and potential discoveries; (2) Alleleomic analytics, a pangenomic-scale analysis of variants, providing insights into intra-species sequence variation and potential mutations for applications; (3) A global search function enabling broad and deep investigations across pangenomes to power research and bioengineering workflows; (4) A bibliome of 833 open-access pangenomic papers and an interface with an LLM that can answer in-depth questions using its knowledge. PanKB empowers researchers and bioengineers to harness the potential of microbial pangenomics and serves as a valuable resource bridging the gap between pangenomic data and practical applications.}, } @article {pmid39565199, year = {2024}, author = {Mudge, JM and Carbonell-Sala, S and Diekhans, M and Martinez, JG and Hunt, T and Jungreis, I and Loveland, JE and Arnan, C and Barnes, I and Bennett, R and Berry, A and Bignell, A and Cerdán-Vélez, D and Cochran, K and Cortés, LT and Davidson, C and Donaldson, S and Dursun, C and Fatima, R and Hardy, M and Hebbar, P and Hollis, Z and James, BT and Jiang, Y and Johnson, R and Kaur, G and Kay, M and Mangan, RJ and Maquedano, M and Gómez, LM and Mathlouthi, N and Merritt, R and Ni, P and Palumbo, E and Perteghella, T and Pozo, F and Raj, S and Sisu, C and Steed, E and Sumathipala, D and Suner, MM and Uszczynska-Ratajczak, B and Wass, E and Yang, YT and Zhang, D and Finn, RD and Gerstein, M and Guigó, R and Hubbard, TJP and Kellis, M and Kundaje, A and Paten, B and Tress, ML and Birney, E and Martin, FJ and Frankish, A}, title = {GENCODE 2025: reference gene annotation for human and mouse.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae1078}, pmid = {39565199}, issn = {1362-4962}, support = {U24HG007234/HG/NHGRI NIH HHS/United States ; /NH/NIH HHS/United States ; WT222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; //European Molecular Biology Laboratory/ ; 2021/42/E/NZ2/00434//National Science Center/ ; }, abstract = {GENCODE produces comprehensive reference gene annotation for human and mouse. Entering its twentieth year, the project remains highly active as new technologies and methodologies allow us to catalog the genome at ever-increasing granularity. In particular, long-read transcriptome sequencing enables us to identify large numbers of missing transcripts and to substantially improve existing models, and our long non-coding RNA catalogs have undergone a dramatic expansion and reconfiguration as a result. Meanwhile, we are incorporating data from state-of-the-art proteomics and Ribo-seq experiments to fine-tune our annotation of translated sequences, while further insights into function can be gained from multi-genome alignments that grow richer as more species' genomes are sequenced. Such methodologies are combined into a fully integrated annotation workflow. However, the increasing complexity of our resources can present usability challenges, and we are resolving these with the creation of filtered genesets such as MANE Select and GENCODE Primary. The next challenge is to propagate annotations throughout multiple human and mouse genomes, as we enter the pangenome era. Our resources are freely available at our web portal www.gencodegenes.org, and via the Ensembl and UCSC genome browsers.}, } @article {pmid39565095, year = {2024}, author = {Bulka, O and Mahadevan, R and Edwards, EA}, title = {Pangenomic insights into Dehalobacter evolution and acquisition of functional genes for bioremediation.}, journal = {Microbial genomics}, volume = {10}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001324}, pmid = {39565095}, issn = {2057-5858}, mesh = {*Biodegradation, Environmental ; *Phylogeny ; *Genome, Bacterial ; Evolution, Molecular ; Bacterial Proteins/genetics/metabolism ; Genomics ; Gene Transfer, Horizontal ; Metagenome ; }, abstract = {Dehalobacter is a genus of organohalide-respiring bacteria that is recognized for its fastidious growth using reductive dehalogenases (RDases). In the SC05 culture, however, a Dehalobacter population also mineralizes dichloromethane (DCM) produced by chloroform dechlorination using the mec cassette, just downstream of its active RDase. A closed genome of this DCM-mineralizing lineage has previously evaded assembly. Here, we present the genomes of two novel Dehalobacter strains, each of which was assembled from the metagenome of a distinct subculture from SC05. A pangenomic analysis of the Dehalobacter genus, including RDase synteny and phylogenomics, reveals at least five species of Dehalobacter based on average nucleotide identity, RDase and core gene synteny, as well as differential functional genes. An integration hotspot is also pinpointed in the Dehalobacter genome, in which many recombinase islands have accumulated. This nested recombinase island encodes the active RDase and mec cassette in both SC05 Dehalobacter genomes, indicating the transfer of key functional genes between species of Dehalobacter. Horizontal gene transfer between these two novel Dehalobacter strains has implications for the evolutionary history within the SC05 subcultures and of the Dehalobacter genus as a whole, especially regarding adaptation to anthropogenic chemicals.}, } @article {pmid39565084, year = {2024}, author = {Yu, H and Wang, S and Wang, L and Wu, W and Xu, W and Wu, S and Li, X and Xu, W and Huang, Z and Lin, Y and Wang, H}, title = {Pan-genomic characterization and structural variant analysis reveal insights into spore development and species diversity in Ganoderma.}, journal = {Microbial genomics}, volume = {10}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001328}, pmid = {39565084}, issn = {2057-5858}, mesh = {*Ganoderma/genetics/classification ; *Spores, Fungal/genetics ; *Genome, Fungal ; Phylogeny ; Genomics ; Genetic Variation ; Fungal Proteins/genetics ; }, abstract = {Understanding the genomic diversity and functional implications of Ganoderma species is crucial for elucidating their evolutionary history and biotechnological potential. Here, we present the first pan-genomic analysis of Ganoderma spp., combining five newly sequenced genomes with ten publicly available genomes. Our comprehensive comparative study unveiled a rich genomic landscape, identifying core genes shared among all Ganoderma strains and species-specific gene sets. Additionally, we identified structural variants impacting the expression of key genes, including insights into the MSH4 gene involved in DNA repair and recombination processes, which exhibits a 440 bp insertion in the promoter region and a leucine-to-serine mutation in the gene body, potentially increasing spore production in the S3 strain. Overall, our study provides valuable insights into the genomic architecture and functional diversity of Ganoderma, paving the way for further research on its evolutionary dynamics, biotechnological applications and pharmaceutical potential.}, } @article {pmid39558187, year = {2024}, author = {Hamilton, JP and Li, C and Buell, CR}, title = {The rice genome annotation project: an updated database for mining the rice genome.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae1061}, pmid = {39558187}, issn = {1362-4962}, support = {//University of Georgia/ ; //Georgia Research Alliance/ ; //Georgia Seed Development/ ; }, abstract = {Rice (Oryza sativa L.) is a major cereal crop that provides calories across the world. With a small genome, rice has been used extensively as a model for genetic and genomic studies in the Poaceae. Since the release of the first rice genome sequence in 2002, an improved reference genome assembly, multiple whole genome assemblies, extensive gene expression profiles, and resequencing data from over 3000 rice accessions have been generated. To facilitate access to the rice genome for plant biologists, we updated the Rice Genome Annotation Project database (RGAP; https://rice.uga.edu) with new datasets including 16 whole genome rice assemblies and sequence variants generated from multiple rice pan-genome projects including the 3000 Rice Genomes Project. We updated gene expression abundance data with 80 RNA-sequencing datasets and to facilitate gene function discovery, performed gene coexpression resulting in 39 coexpression modules that capture highly connected sets of co-regulated genes. To facilitate comparative genome analyses, 32 335 syntelogs were identified between the Nipponbare reference genome and other rice genomes and 19 371 syntelogs were identified between Nipponbare and four other Poaceae genomes. Infrastructure improvements to the RGAP database include an upgraded genome browser and data access portals, enhanced website security and increased performance of the website.}, } @article {pmid39557856, year = {2024}, author = {Long, W and He, Q and Wang, Y and Wang, Y and Wang, J and Yuan, Z and Wang, M and Chen, W and Luo, L and Luo, L and Xu, W and Li, Y and Li, W and Yan, L and Cai, Y and Du, H and Xie, H}, title = {Genome evolution and diversity of wild and cultivated rice species.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {9994}, pmid = {39557856}, issn = {2041-1723}, support = {CARS-01-08//Earmarked Fund for China Agriculture Research System/ ; 31960400//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Oryza/genetics ; *Genome, Plant ; *Evolution, Molecular ; *Genetic Variation ; *Crops, Agricultural/genetics ; Domestication ; Disease Resistance/genetics ; Phylogeny ; Plant Breeding ; Multigene Family/genetics ; Genes, Plant/genetics ; }, abstract = {Wild species of crops serve as a valuable germplasm resource for breeding of modern cultivars. Rice (Oryza sativa L.) is a vital global staple food. However, research on genome evolution and diversity of wild rice species remains limited. Here, we present nearly complete genomes of 13 representative wild rice species. By integrating with four previously published genomes for pangenome analysis, a total of 101,723 gene families are identified across the genus, including 9834 (9.67%) core gene families. Additionally, 63,881 gene families absent in cultivated rice species but present in wild rice species are discovered. Extensive structural rearrangements, sub-genomes exchanges, widespread allelic variations, and regulatory sequence variations are observed in wild rice species. Interestingly, expanded but less diverse disease resistance genes in the genomes of cultivated rice, likely due to the loss of some resistance genes and the fixing and amplification of genes encoding resistance genes to specific diseases during domestication and artificial selection. This study not only reveals natural variations valuable for gene-level studies and breeding selection but also enhances our understanding on rice evolution and domestication.}, } @article {pmid39555885, year = {2024}, author = {Benarroch, L}, title = {[The pangenome integrates the vast genetic diversity of the human population].}, journal = {Medecine sciences : M/S}, volume = {40 Hors série n° 1}, number = {}, pages = {75}, doi = {10.1051/medsci/2024126}, pmid = {39555885}, issn = {1958-5381}, } @article {pmid39554176, year = {2024}, author = {Dobhal, S and Hugouvieux-Cotte-Pattat, N and Arizala, D and Sari, GB and Chuang, SC and Alvarez, AM and Arif, M}, title = {Dickeya ananae sp. nov., pectinolytic bacterium isolated from pineapple (Ananas comosus).}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.10.29.620964}, pmid = {39554176}, issn = {2692-8205}, abstract = {Recently, species clustering within Dickeya zeae has been identified as complex, encompassing validly published names, including D. oryzae and D. parazeae , with some strains potentially delineating new species. In this study, genomes of strains isolated from a bacterial heart rot outbreak in pineapple (Ananas comosus var. comosus) on Oahu, Hawaii, along with two strains from pineapple in Malaysia, were sequenced. Orthologous average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values among the sequenced genomes ranged from 98.93-99.9% and 91.8-99.9%, respectively, supporting the classification of seven strains within the same species. Comparisons of ANI and dDDH values between these seven strains and type strains of D. zeae, D. parazeae, and D. oryzae ranged from 94.4-95.9% and 57.2-66.5%, respectively. These values fall below the proposed boundaries for new species designation, supporting the delineation of a novel species. Phylogenetic analyses, including 16S rRNA, gapA , multi-locus sequence analysis (MLSA) of 10 housekeeping genes, whole-genome, and pangenome analyses, were concordant and revealed a distinct monophyletic clade, separating these strains from other members of the D. zeae complex, with D. oryzae as the closest relative. Notably, a nitrogen fixation gene cluster comprising 28 genes, similar to the Klebsiella spp. nitrogenase gene cluster, was found in the genome of the seven pineapple strains. Based on polyphasic approaches, including ANI, dDDH, biochemical, physiological, and phylogenomic analyses, we propose the reclassification in a new species of the five pineapple strains from Hawaii A5391, A5410 [T] , A5611, A6136, and A6137, together with the two pineapple strains from Malaysia CFBP 1272 and CFBP 1278, previously classified as D. zeae . We propose the name Dickeya ananae sp. nov. for this taxon, represented by the type strain A5410 [T] (= ICMP 25020 [T] = LMG 33197 [T]).}, } @article {pmid39554168, year = {2024}, author = {Chandra, G and Hossen, MH and Scholz, S and Dilthey, AT and Gibney, D and Jain, C}, title = {Integer programming framework for pangenome-based genome inference.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.10.27.620212}, pmid = {39554168}, issn = {2692-8205}, abstract = {UNLABELLED: Affordable genotyping methods are essential in genomics. Commonly used genotyping methods primarily support single nucleotide variants and short indels but neglect structural variants. Additionally, accuracy of read alignments to a reference genome is unreliable in highly polymorphic and repetitive regions, further impacting genotyping performance. Recent works highlight the advantage of haplotype-resolved pangenome graphs in addressing these challenges. Building on these developments, we propose a rigorous alignment-free genotyping framework. Our formulation seeks a path through the pangenome graph that maximizes the matches between the path and substrings of sequencing reads (e.g., k -mers) while minimizing recombination events (haplotype switches) along the path. We prove that this problem is NP-Hard and develop efficient integer-programming solutions. We benchmarked the algorithm using downsampled short-read datasets from homozygous human cell lines with coverage ranging from 0.1× to 10×. Our algorithm accurately estimates complete major histocompatibility complex (MHC) haplotype sequences with small edit distances from the ground-truth sequences, providing a significant advantage over existing methods on low-coverage inputs. Although our algorithm is designed for haploid samples, we discuss future extensions to diploid samples.

IMPLEMENTATION: https://github.com/at-cg/PHI.}, } @article {pmid39554056, year = {2024}, author = {Brown, NK and Shivakumar, VS and Langmead, B}, title = {Improved pangenomic classification accuracy with chain statistics.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.10.29.620953}, pmid = {39554056}, issn = {2692-8205}, abstract = {Compressed full-text indexes enable efficient sequence classification against a pangenome or tree-of-life index. Past work on compressed-index classification used matching statistics or pseudo-matching lengths to capture the fine-grained co-linearity of exact matches. But these fail to capture coarse-grained information about whether seeds appear co-linearly in the reference. We present a novel approach that additionally obtains coarse-grained co-linearity ("chain") statistics. We do this without using a chaining algorithm, which would require superlinear time in the number of matches. We start with a collection of strings, avoiding the multiple-alignment step required by graph approaches. We rapidly compute multi-maximal unique matches (multi-MUMs) and identify BWT sub-runs that correspond to these multi-MUMs. From these, we select those that can be "tunneled," and mark these with the corresponding multi-MUM identifiers. This yields an ℴ(r + n/d)-space index for a collection of d sequences having a length- n BWT consisting of r maximal equal-character runs. Using the index, we simultaneously compute fine-grained matching statistics and coarse-grained chain statistics in linear time with respect to query length. We found that this substantially improves classification accuracy compared to past compressed-indexing approaches and reaches the same level of accuracy as less efficient alignmentbased methods.}, } @article {pmid39552065, year = {2024}, author = {Matthews, CA and Watson-Haigh, NS and Burton, RA and Sheppard, AE}, title = {A gentle introduction to pangenomics.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {6}, pages = {}, doi = {10.1093/bib/bbae588}, pmid = {39552065}, issn = {1477-4054}, mesh = {*Genomics/methods ; Humans ; Genome ; }, abstract = {Pangenomes have emerged in response to limitations associated with traditional linear reference genomes. In contrast to a traditional reference that is (usually) assembled from a single individual, pangenomes aim to represent all of the genomic variation found in a group of organisms. The term 'pangenome' is currently used to describe multiple different types of genomic information, and limited language is available to differentiate between them. This is frustrating for researchers working in the field and confusing for researchers new to the field. Here, we provide an introduction to pangenomics relevant to both prokaryotic and eukaryotic organisms and propose a formalization of the language used to describe pangenomes (see the Glossary) to improve the specificity of discussion in the field.}, } @article {pmid39548374, year = {2024}, author = {Heppert, JK and Awori, RM and Cao, M and Chen, G and McLeish, J and Goodrich-Blair, H}, title = {Analyses of Xenorhabdus griffiniae genomes reveal two distinct sub-species that display intra-species variation due to prophages.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {1087}, pmid = {39548374}, issn = {1471-2164}, mesh = {*Xenorhabdus/genetics/classification ; *Prophages/genetics ; *Genome, Bacterial ; *Phylogeny ; Symbiosis ; Animals ; Genomics/methods ; Genetic Variation ; }, abstract = {BACKGROUND: Nematodes of the genus Steinernema and their Xenorhabdus bacterial symbionts are lethal entomopathogens that are useful in the biocontrol of insect pests, as sources of diverse natural products, and as research models for mutualism and parasitism. Xenorhabdus play a central role in all aspects of the Steinernema lifecycle, and a deeper understanding of their genomes therefore has the potential to spur advances in each of these applications.

RESULTS: Here, we report a comparative genomics analysis of Xenorhabdus griffiniae, including the symbiont of Steinernema hermaphroditum nematodes, for which genetic and genomic tools are being developed. We sequenced and assembled circularized genomes for three Xenorhabdus strains: HGB2511, ID10 and TH1. We then determined their relationships to other Xenorhabdus and delineated their species via phylogenomic analyses, concluding that HGB2511 and ID10 are Xenorhabdus griffiniae while TH1 is a novel species. These additions to the existing X. griffiniae landscape further allowed for the identification of two subspecies within the clade. Consistent with other Xenorhabdus, the analysed X. griffiniae genomes each encode a wide array of antimicrobials and virulence-related proteins. Comparative genomic analyses, including the creation of a pangenome, revealed that a large amount of the intraspecies variation in X. griffiniae is contained within the mobilome and attributable to prophage loci. In addition, CRISPR arrays, secondary metabolite potential and toxin genes all varied among strains within the X. griffiniae species.

CONCLUSIONS: Our findings suggest that phage-related genes drive the genomic diversity in closely related Xenorhabdus symbionts, and that these may underlie some of the traits most associated with the lifestyle and survival of entomopathogenic nematodes and their bacteria: virulence and competition. This study establishes a broad knowledge base for further exploration of not only the relationships between X. griffiniae species and their nematode hosts but also the molecular mechanisms that underlie their entomopathogenic lifestyle.}, } @article {pmid39547942, year = {2024}, author = {Zhang, LJ and Wang, N and Huang, W and Wu, LY and Song, B and Wang, SL and Sheng, JD and Wang, W}, title = {Genome-based analysis of biosynthetic potential from antimycotic Streptomyces rochei strain A144.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnae097}, pmid = {39547942}, issn = {1574-6968}, abstract = {Streptomyces rochei is a species of Streptomyces with a diverse range of biological activities. S. rochei strain A144 was isolated from desert soils and exhibits antagonistic activity against several plant pathogenic fungi. The genome of S. rochei A144 was sequenced and revealed the presence of one linear chromosome and one plasmid. The chromosome length was found to be 8,085,429 bp, with a GC content of 72.62%, while the Plas1 length was 177,399 bp, with a GC content of 69.08%. Comparative genomics was employed to analyse the S. rochei group. There is a high degree of collinearity between the genomes of S. rochei strains. Based on pan-genome analysis, S. rochei has 10,315 gene families, including 4051 core and 2322 unique genes. AntiSMASH was used to identify the gene clusters for secondary metabolites, identifying 33 secondary metabolite genes on the A144 genome. Among them, 18 clusters were found to be >70% identical to known biosynthetic gene clusters (BGCs), indicating that A144 has the potential to synthesize secondary metabolites. The majority of the BGCs were found to be conserved within the S. rochei group, including those encoding polyketide synthases (PKS), terpenes, non-ribosomal peptide synthetases (NRPS), other ribosomally synthesised and post-translationally modified peptides (RiPP), nicotianamine-iron transporters, lanthipeptides, and a few other types. The S. rochei group can be a potential genetic source of useful secondary metabolites with applications in medicine and biotechnology.}, } @article {pmid39546548, year = {2024}, author = {Foo, A and Brettell, LE and Nichols, HL and , and Medina Muñoz, M and Lysne, JA and Dhokiya, V and Hoque, AF and Brackney, DE and Caragata, EP and Hutchinson, ML and Jacobs-Lorena, M and Lampe, DJ and Martin, E and Valiente Moro, C and Povelones, M and Short, SM and Steven, B and Xu, J and Paustian, TD and Rondon, MR and Hughes, GL and Coon, KL and Heinz, E}, title = {MosAIC: An annotated collection of mosquito-associated bacteria with high-quality genome assemblies.}, journal = {PLoS biology}, volume = {22}, number = {11}, pages = {e3002897}, doi = {10.1371/journal.pbio.3002897}, pmid = {39546548}, issn = {1545-7885}, abstract = {Mosquitoes transmit medically important human pathogens, including viruses like dengue virus and parasites such as Plasmodium spp., the causative agent of malaria. Mosquito microbiomes are critically important for the ability of mosquitoes to transmit disease-causing agents. However, while large collections of bacterial isolates and genomic data exist for vertebrate microbiomes, the vast majority of work in mosquitoes to date is based on 16S rRNA gene amplicon data that provides limited taxonomic resolution and no functional information. To address this gap and facilitate future studies using experimental microbiome manipulations, we generated a bacterial Mosquito-Associated Isolate Collection (MosAIC) consisting of 392 bacterial isolates with extensive metadata and high-quality draft genome assemblies that are publicly available, both isolates and sequence data, for use by the scientific community. MosAIC encompasses 142 species spanning 29 bacterial families, with members of the Enterobacteriaceae comprising 40% of the collection. Phylogenomic analysis of 3 genera, Enterobacter, Serratia, and Elizabethkingia, reveal lineages of mosquito-associated bacteria isolated from different mosquito species in multiple laboratories. Investigation into species' pangenomes further reveals clusters of genes specific to these lineages, which are of interest for future work to test for functions connected to mosquito host association. Altogether, we describe the generation of a physical collection of mosquito-associated bacterial isolates, their genomic data, and analyses of selected groups in context of genome data from closely related isolates, providing a unique, highly valuable resource for research on bacterial colonisation and adaptation within mosquito hosts. Future efforts will expand the collection to include broader geographic and host species representation, especially from individuals collected from field populations, as well as other mosquito-associated microbes, including fungi, archaea, and protozoa.}, } @article {pmid39545504, year = {2024}, author = {Bird, KA and Brock, JR and Grabowski, PP and Harder, AM and Healy, A and Shu, S and Barry, K and Boston, L and Daum, C and Guo, J and Lipzen, A and Walstead, R and Grimwood, J and Schmutz, J and Lu, C and Comai, L and McKay, JK and Pires, JC and Edger, PP and Lovell, JT and Kliebenstein, DJ}, title = {Allopolyploidy expanded gene content but not pangenomic variation in the hexaploid oilseed Camelina sativa.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyae183}, pmid = {39545504}, issn = {1943-2631}, abstract = {Ancient whole-genome duplications (WGDs) are believed to facilitate novelty and adaptation by providing the raw fuel for new genes. However, it is unclear how recent WGDs may contribute to evolvability within recent polyploids. Hybridization accompanying some WGDs may combine divergent gene content among diploid species. Some theory and evidence suggest that polyploids have a greater accumulation and tolerance of gene presence-absence and genomic structural variation, but it is unclear to what extent either is true. To test how recent polyploidy may influence pangenomic variation, we sequenced, assembled, and annotated twelve complete, chromosome-scale genomes of Camelina sativa, an allohexaploid biofuel crop with three distinct subgenomes. Using pangenomic comparative analyses, we characterized gene presence-absence and genomic structural variation both within and between the subgenomes. We found over 75% of ortholog gene clusters are core in Camelina sativa and <10% of sequence space was affected by genomic structural rearrangements. In contrast, 19% of gene clusters were unique to one subgenome, and the majority of these were Camelina-specific (no ortholog in Arabidopsis). We identified an inversion that may contribute to vernalization requirements in winter-type Camelina, and an enrichment of Camelina-specific genes with enzymatic processes related to seed oil quality and Camelina's unique glucosinolate profile. Genes related to these traits exhibited little presence-absence variation. Our results reveal minimal pangenomic variation in this species, and instead show how hybridization accompanied by WGD may benefit polyploids by merging diverged gene content of different species.}, } @article {pmid39544963, year = {2024}, author = {Wong, HL and Bulzu, PA and Ghai, R and Chiriac, MC and Salcher, MM}, title = {Ubiquitous genome streamlined Acidobacteriota in freshwater environments.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae124}, pmid = {39544963}, issn = {2730-6151}, abstract = {Acidobacteriota are abundant in soil, peatlands, and sediments, but their ecology in freshwater environments remains understudied. UBA12189, an Acidobacteriota genus, is an uncultivated, genome-streamlined lineage with a small genome size found in aquatic environments where detailed genomic analyses are lacking. Here, we analyzed 66 MAGs of UBA12189 (including one complete genome) from freshwater lakes and rivers in Europe, North America, and Asia. UBA12189 has small genome sizes (<1.4 Mbp), low GC content, and a highly diverse pangenome. In freshwater lakes, this bacterial lineage is abundant from the surface waters (epilimnion) down to a 300-m depth (hypolimnion). UBA12189 appears to be free-living from CARD-FISH analysis. When compared to other genome-streamlined bacteria such as Nanopelagicales and Methylopumilus, genome reduction has caused UBA12189 to have a more limited metabolic repertoire in carbon, sulfur, and nitrogen metabolisms, limited numbers of membrane transporters, as well as a higher degree of auxotrophy for various amino acids, vitamins, and reduced sulfur. Despite having reduced genomes, UBA12189 encodes proteorhodopsin, complete biosynthesis pathways for heme and vitamin K2, cbb3-type cytochrome c oxidases, and heme-requiring enzymes. These genes may give a selective advantage during the genome streamlining process. We propose the new genus Acidiparvus, with two new species named "A. lacustris" and "A. fluvialis". Acidiparvus is the first described genome-streamlined lineage under the phylum Acidobacteriota, which is a free-living, slow-growing scavenger in freshwater environments.}, } @article {pmid39537924, year = {2024}, author = {Jayakodi, M and Lu, Q and Pidon, H and Rabanus-Wallace, MT and Bayer, M and Lux, T and Guo, Y and Jaegle, B and Badea, A and Bekele, W and Brar, GS and Braune, K and Bunk, B and Chalmers, KJ and Chapman, B and Jørgensen, ME and Feng, JW and Feser, M and Fiebig, A and Gundlach, H and Guo, W and Haberer, G and Hansson, M and Himmelbach, A and Hoffie, I and Hoffie, RE and Hu, H and Isobe, S and König, P and Kale, SM and Kamal, N and Keeble-Gagnère, G and Keller, B and Knauft, M and Koppolu, R and Krattinger, SG and Kumlehn, J and Langridge, P and Li, C and Marone, MP and Maurer, A and Mayer, KFX and Melzer, M and Muehlbauer, GJ and Murozuka, E and Padmarasu, S and Perovic, D and Pillen, K and Pin, PA and Pozniak, CJ and Ramsay, L and Pedas, PR and Rutten, T and Sakuma, S and Sato, K and Schüler, D and Schmutzer, T and Scholz, U and Schreiber, M and Shirasawa, K and Simpson, C and Skadhauge, B and Spannagl, M and Steffenson, BJ and Thomsen, HC and Tibbits, JF and Nielsen, MTS and Trautewig, C and Vequaud, D and Voss, C and Wang, P and Waugh, R and Westcott, S and Rasmussen, MW and Zhang, R and Zhang, XQ and Wicker, T and Dockter, C and Mascher, M and Stein, N}, title = {Structural variation in the pangenome of wild and domesticated barley.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {39537924}, issn = {1476-4687}, abstract = {Pangenomes are collections of annotated genome sequences of multiple individuals of a species[1]. The structural variants uncovered by these datasets are a major asset to genetic analysis in crop plants[2]. Here we report a pangenome of barley comprising long-read sequence assemblies of 76 wild and domesticated genomes and short-read sequence data of 1,315 genotypes. An expanded catalogue of sequence variation in the crop includes structurally complex loci that are rich in gene copy number variation. To demonstrate the utility of the pangenome, we focus on four loci involved in disease resistance, plant architecture, nutrient release and trichome development. Novel allelic variation at a powdery mildew resistance locus and population-specific copy number gains in a regulator of vegetative branching were found. Expansion of a family of starch-cleaving enzymes in elite malting barleys was linked to shifts in enzymatic activity in micro-malting trials. Deletion of an enhancer motif is likely to change the developmental trajectory of the hairy appendages on barley grains. Our findings indicate that allelic diversity at structurally complex loci may have helped crop plants to adapt to new selective regimes in agricultural ecosystems.}, } @article {pmid39535885, year = {2024}, author = {Du, H and Zhuo, Y and Lu, S and Li, W and Zhou, L and Sun, F and Liu, G and Liu, JF}, title = {Pangenome Reveals Gene Content Variations and Structural Variants Contributing to Pig Characteristics.}, journal = {Genomics, proteomics & bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/gpbjnl/qzae081}, pmid = {39535885}, issn = {2210-3244}, abstract = {Pigs are among the most essential sources of high-quality protein in human diets. Structural variants (SVs) are a major source of genetic variants associated with diverse traits and evolutionary events. However, the current linear reference genome of pigs limits the presentation of position information for SVs. In this study, we generated a pangenome of pigs and a genome variation map of 599 deep-sequenced genomes across Eurasia. Moreover, a section-wide gene repertoire was constructed, which indicated that core genes were more evolutionarily conserved than variable genes. Subsequently, we identified 546,137 SVs, their enrichment regions, and relationships with genomic features and found significant divergence across Eurasian pigs. More importantly, the pangenome-detected SVs could complement heritability estimates and genome-wide association studies based only on single nucleotide polymorphisms. Among the SVs shaped by selection, we identified an insertion in the promoter region of the TBX19 gene, which may be related to the development, growth, and timidity traits of Asian pigs and may affect the gene expression. Our constructed pig pangenome and the identified SVs provide rich resources for future functional genomic research on pigs.}, } @article {pmid39535230, year = {2024}, author = {van Westerhoven, AC and Dijkstra, J and Aznar Palop, JL and Wissink, K and Bell, J and Kema, GHJ and Seidl, MF}, title = {Frequent genetic exchanges revealed by a pan-mitogenome graph of a fungal plant pathogen.}, journal = {mBio}, volume = {}, number = {}, pages = {e0275824}, doi = {10.1128/mbio.02758-24}, pmid = {39535230}, issn = {2150-7511}, abstract = {Mitochondria are present in almost all eukaryotic lineages. The mitochondrial genomes (mitogenomes) evolve separately from nuclear genomes, and they can therefore provide relevant insights into the evolution of their host species. Fusarium oxysporum is a major fungal plant pathogen that is assumed to reproduce clonally. However, horizontal chromosome transfer between strains can occur through heterokaryon formation, and recently, signs of sexual recombination have been observed. Similarly, signs of recombination in F. oxysporum mitogenomes challenged the prevailing assumption of clonal reproduction in this species. Here, we construct, to our knowledge, the first fungal pan-mitogenome graph of nearly 500 F. oxysporum mitogenome assemblies to uncover the variation and evolution. In general, the gene order of fungal mitogenomes is not well conserved, yet the mitogenome of F. oxysporum and related species are highly colinear. We observed two strikingly contrasting regions in the F. oxysporum pan-mitogenome, comprising a highly conserved core mitogenome and a long variable region (6-16 kb in size), of which we identified three distinct types. The pan-mitogenome graph reveals that only five intron insertions occurred in the core mitogenome and that the long variable regions drive the difference between mitogenomes. Moreover, we observed that their evolution is neither concurrent with the core mitogenome nor with the nuclear genome. Our large-scale analysis of long variable regions uncovers frequent recombination between mitogenomes, even between strains that belong to different taxonomic clades. This challenges the common assumption of incompatibility between genetically diverse F. oxysporum strains and provides new insights into the evolution of this fungal species.IMPORTANCEInsights into plant pathogen evolution is essential for the understanding and management of disease. Fusarium oxysporum is a major fungal pathogen that can infect many economically important crops. Pathogenicity can be transferred between strains by the horizontal transfer of pathogenicity chromosomes. The fungus has been thought to evolve clonally, yet recent evidence suggests active sexual recombination between related isolates, which could at least partially explain the horizontal transfer of pathogenicity chromosomes. By constructing a pan-genome graph of nearly 500 mitochondrial genomes, we describe the genetic variation of mitochondria in unprecedented detail and demonstrate frequent mitochondrial recombination. Importantly, recombination can occur between genetically diverse isolates from distinct taxonomic clades and thus can shed light on genetic exchange between fungal strains.}, } @article {pmid39535198, year = {2024}, author = {Davis, SC and Cerra, J and Williams, LE}, title = {Comparative genomics of obligate predatory bacteria belonging to phylum Bdellovibrionota highlights distribution and predicted functions of lineage-specific protein families.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0068024}, doi = {10.1128/msphere.00680-24}, pmid = {39535198}, issn = {2379-5042}, abstract = {Comparative genomics of predatory bacteria is important to understand their ecology and evolution and explore their potential to treat drug-resistant infections. We compared chromosomes of 18 obligate predators from phylum Bdellovibrionota (16 intraperiplasmic, two epibiotic) and 15 non-predatory bacteria. Phylogenetics of conserved single-copy genes and analysis of genome-wide average amino acid identity provide evidence for at least five Bdellovibrio species and support recent reclassifications of predatory taxa. To define shared and differential genome content, we grouped predicted protein sequences into gene clusters based on sequence similarity. Few gene clusters are shared by all 33 bacteria or all 18 predatory bacteria; however, we identified gene clusters conserved within lineages, such as intraperiplasmic Bdellovibrio, and not found in other bacteria. Many of these are predicted to function in cell envelope biogenesis, signal transduction, and other roles important for predatory lifestyles. Among intraperiplasmic Bdellovibrio, we detected high abundance of gene clusters predicted to encode transglycosylases, endopeptidases, and lysozymes, and we identified six gene clusters (amidase, L,D-transpeptidase, four transglycosylases) with evidence of recent gene duplication and gene family expansion. Focusing on peptidoglycan metabolism, we defined a suite of gene clusters that include peptidoglycan-degrading and -modifying enzymes and occur only in predatory bacteria, suggesting these proteins may have evolved activities specific to predation. Our analyses highlight key genome content differences between obligate predatory bacteria and non-predatory relatives and identify gene clusters that may encode enzymes adapted to predatory lifestyles. These lineage-specific proteins are strong candidates for functional characterization to clarify their role in predation.IMPORTANCEEvolution of predation as a bacterial lifestyle involves selective pressure on and adaptation of enzymes that contribute to killing and digestion of prey bacteria, in some cases from within the prey itself. Such enzymes are a hallmark of obligate predatory bacteria belonging to phylum Bdellovibrionota, which includes the well-studied predator Bdellovibrio. By comparing protein sequences of obligate predatory bacteria and their non-predatory relatives, we define key genome content differences that distinguish bacterial predators and identify lineage-specific enzymes that may have evolved unique activities due to selective pressures related to a predatory lifestyle. In addition to providing insights into the ecology and evolution of predatory bacteria, comparative genomics studies, like this, can inform efforts to develop predatory bacteria and/or their enzymes as potential biocontrol agents to combat drug-resistant bacterial infections.}, } @article {pmid39533060, year = {2024}, author = {Aguirre-Sánchez, JR and Chaidez, C and Castro-Del Campo, N}, title = {The pangenome analysis of the environmental source Salmonella enterica highlights a diverse accessory genome and a distinct serotype clustering.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnae090}, pmid = {39533060}, issn = {1574-6968}, abstract = {Salmonella remains the leading cause of foodborne infections globally. Environmental reservoirs, particularly aquatic bodies, serve as conduits for the fecal-oral transmission of this pathogen. While the gastrointestinal tract is traditionally considered the primary habitat of Salmonella, mounting evidence suggests the bacterium's capacity for survival in external environments. The application of advanced technological platforms, such as next-generation sequencing (NGS), facilitates a comprehensive analysis of Salmonella's genomic features. This study aims to characterize the genomic composition of Salmonella isolates from river water, contributing to a potential paradigm shift and advancing public health protection. A total of 25 river water samples were collected and processed, followed by microbiological isolation of Salmonella strains, which were then sequenced. Genomic characterization revealed adaptive mechanisms, including gene duplication. Furthermore, an open pangenome, predisposed to incorporating foreign genetic material, was identified. Notably, antibiotic resistance genes were found to be part of the core genome, challenging previous reports that placed them in the accessory genome.}, } @article {pmid39532890, year = {2024}, author = {Zhang, M and Adroub, S and Ummels, R and Asaad, M and Song, L and Pain, A and Bitter, W and Guan, Q and Abdallah, AM}, title = {Comprehensive pan-genome analysis of Mycobacterium marinum: insights into genomic diversity, evolution, and pathogenicity.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {27723}, pmid = {39532890}, issn = {2045-2322}, support = {2024B20//The Bethune Project of Jilin University/ ; 23YQ10//Science and Technology Development Project/ ; 04045970001//Research start-up funds/ ; }, mesh = {*Mycobacterium marinum/genetics/pathogenicity ; *Genome, Bacterial ; *Phylogeny ; *Genetic Variation ; *Evolution, Molecular ; Humans ; Genomics/methods ; Virulence Factors/genetics ; Whole Genome Sequencing/methods ; Virulence/genetics ; Mycobacterium Infections, Nontuberculous/microbiology ; }, abstract = {Mycobacteria is a diverse genus that includes both innocuous environmental species and serious pathogens like Mycobacterium tuberculosis, Mycobacterium leprae, and Mycobacterium ulcerans, the causative agents of tuberculosis, leprosy, and Buruli ulcer, respectively. This study focuses on Mycobacterium marinum, a closely related species known for its larger genome and ability to infect ectothermic species and cooler human extremities. Utilizing whole-genome sequencing, we conducted a comprehensive pan-genome analysis of 100 M. marinum strains, exploring genetic diversity and its impact on pathogenesis and host specificity. Our findings highlight significant genomic diversity, with clear distinctions in core, dispensable, and unique genes among the isolates. Phylogenetic analysis revealed a broad distribution of genetic lineages, challenging previous classifications into distinct clusters. Additionally, we examined the synteny and diversity of the virulence factor CpnT, noting a wide range of C-terminal domain variations across strains, which points to potential adaptations in pathogenic mechanisms. This study enhances our understanding of M. marinum's genomic architecture and its evolutionary relationship with other mycobacterial pathogens, providing insights that could inform disease control strategies for M. tuberculosis and other mycobacteria.}, } @article {pmid39531493, year = {2024}, author = {Fang, B and Edwards, SV}, title = {Fitness consequences of structural variation inferred from a House Finch pangenome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {47}, pages = {e2409943121}, doi = {10.1073/pnas.2409943121}, pmid = {39531493}, issn = {1091-6490}, support = {n/a//Harvard University (Harvard College)/ ; n/a//Harvard China Fund/ ; n/a//Harvard Global Institute (HGI)/ ; }, mesh = {Animals ; *Finches/genetics ; *Genomic Structural Variation ; *Genome/genetics ; Genetic Fitness/genetics ; Polymorphism, Single Nucleotide ; Evolution, Molecular ; Chromosome Inversion/genetics ; Haplotypes ; }, abstract = {Genomic structural variants (SVs) play a crucial role in adaptive evolution, yet their average fitness effects and characterization with pangenome tools are understudied in wild animal populations. We constructed a pangenome for House Finches (Haemorhous mexicanus), a model for studies of host-pathogen coevolution, using long-read sequence data on 16 individuals (32 de novo-assembled haplotypes) and one outgroup. We identified 887,118 SVs larger than 50 base pairs, mostly (60%) involving repetitive elements, with reduced SV diversity in the eastern US as a result of its introduction by humans. The distribution of fitness effects of genome-wide SVs was estimated using maximum likelihood approaches and revealed that SVs in both coding and noncoding regions were on average more deleterious than smaller indels or single nucleotide polymorphisms. The reference-free pangenome facilitated identification of a > 10-My-old, 11-megabase-long pericentric inversion on chromosome 1. We found that the genotype frequencies of the inversion, estimated from 135 birds widely sampled temporally and geographically, increased steadily over the 25 y since House Finches were first exposed to the bacterial pathogen Mycoplasma gallisepticum and showed signatures of balancing selection, capturing genes related to immunity and telomerase activity. We also observed shorter telomeres in populations with a greater number of years exposure to Mycoplasma. Our study illustrates the utility of long-read sequencing and pangenome methods for understanding wild animal populations, estimating fitness effects of genome-wide SVs, and advancing our understanding of adaptive evolution through structural variation.}, } @article {pmid39530695, year = {2024}, author = {Jin, Y and Gao, C and Teng, G and Zhou, Z and Zhou, W and Huang, M}, title = {Dissecting the genetic features and evolution of Staphylococcus aureus sequence type 88: a global perspective.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0114224}, doi = {10.1128/msystems.01142-24}, pmid = {39530695}, issn = {2379-5077}, abstract = {Staphylococcus aureus sequence type (ST) 88, encompassing both methicillin-resistant S. aureus (MRSA) and methicillin-sensitive S. aureus (MSSA) phenotypes, is globally prevalent and commonly associated with skin and soft tissue infections. Despite its widespread occurrence, comprehensive genomic studies on this clone remain scarce. In this study, we performed detailed genomic analyses on 130 ST88 isolates derived from severe bloodstream infections alongside 275 publicly available ST88 sequences. Our phylogenetic analysis identified four distinct clades, with evidence suggesting independent evolution and significant clonal expansion of ST88 in China, particularly within clade I, which appeared to have emerged circa 1964. We documented notable interregional, international, and even intercontinental transmission of ST88 isolates. Variability in the distribution of SCCmec and spa types was observed across clades. Our in silico analyses indicated distinct patterns in the distribution of resistance genes, virulence genes, and mobile genetic elements among the clades, with clade I notably harboring the highest prevalence of the intact sraP gene and an independently acquired novel prophage, φST88-1. Conversely, clade IV exhibited deletions within the sasC gene, with certain sub-clades lacking the sdrDE and fnbB genes, underscoring the superior adhesive capabilities of clade I. In vitro experiments confirmed enhanced biofilm formation in clade I isolates, although the levels of hemolysis and cytotoxicity were similar across clades. Pan-genome-wide association study revealed that core SNPs, rather than the accessory genome, are the primary contributors to the diversification of the ST88 clades. These findings enrich our understanding of the genetic foundations underpinning the transmission dynamics and phenotypic diversity of ST88 clones globally.IMPORTANCEUnderstanding the evolution and transmission of Staphylococcus aureus ST88 clones is critically important due to their spread within food, hospital, and community environments, leading to significant health issues. Despite its prevalence, detailed genomic insights into ST88, particularly regarding its diversity and evolutionary dynamics, have been lacking. Our comprehensive genomic analysis of 130 ST88 isolates from severe bloodstream infections, alongside 275 sequences from public databases, significantly advances our understanding of this pathogen. We identified four distinct evolutionary clades, demonstrating the independent evolution and substantial clonal expansion of ST88 in China, as well as its ability to spread across regions and continents. The diversity among the isolates was evident in their unique profiles of SCCmec elements, antibiotic resistance genes, virulence genes, and mobile genetic elements. Our findings underscore the critical role of core genomic variations over accessory elements in driving the diversification of ST88. This enhanced understanding provides new insights that could inform more effective control strategies, crucial for developing interventions to combat the global spread of this formidable pathogen.}, } @article {pmid39528738, year = {2024}, author = {Snoeck, S and Johanndrees, O and Nürnberger, T and Zipfel, C}, title = {Plant pattern recognition receptors: from evolutionary insight to engineering.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {39528738}, issn = {1471-0064}, abstract = {The plant immune system relies on germline-encoded pattern recognition receptors (PRRs) that sense foreign and plant-derived molecular patterns, and signal health threats. Genomic and pangenomic data sets provide valuable insights into the evolution of PRRs and their molecular triggers, which is furthering our understanding of plant-pathogen co-evolution and convergent evolution. Moreover, in silico and in vivo methods of PRR identification have accelerated the characterization of receptor-ligand complexes, and advances in protein structure prediction algorithms are revealing novel PRR sensor functions. Harnessing these recent advances to engineer PRRs presents an opportunity to enhance plant disease resistance against a broad spectrum of pathogens, enabling more sustainable agricultural practices. This Review summarizes both established and innovative approaches to leverage genomic data and translate resulting evolutionary insights into engineering PRR recognition specificities.}, } @article {pmid39526372, year = {2024}, author = {Liu, Z and Shen, S and Li, C and Zhang, C and Chen, X and Fu, Y and Yu, T and Zhou, R and Liu, D and Yang, QY and Song, X}, title = {SoIR: a comprehensive Solanaceae information resource for comparative and functional genomic study.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae1040}, pmid = {39526372}, issn = {1362-4962}, support = {C2022209010//Natural Science Fund for Distinguished Young Scholars of Hebei Province/ ; 2023YFF1002000//National Key Research and Development Program of China/ ; 32172583//National Natural Science Foundation of China/ ; 23372505D//S&T Program of Hebei/ ; H2023209084//Hebei Natural Science Foundation/ ; }, abstract = {The Solanaceae family, which includes economically important crops such as tomatoes, potatoes and peppers, has experienced a rapid expansion in genomic data due to advancements in sequencing technologies. However, existing databases are limited by incomplete species representation, a lack of comprehensive comparative genomic tools and the absence of systematic pan-genomic analyses. To address these gaps, we developed the Solanaceae Information Resource (SoIR, https://soir.bio2db.com), a comprehensive genomics database for the Solanaceae family. SoIR integrates genomic data from 81 species and transcriptomic data from 41 species, encompassing a total of 3 908 408 gene annotations derived from Gene Ontology, nonredundant protein, Pfam, Swiss-Prot and TrEMBL databases. The resource also includes 3 437 115 CRISPR guide sequences, 212 395 transcription factors and 19 086 genes associated with methylation modification. In addition to species-specific analyses, SoIR provides extensive bioinformatics tools for investigating gene family evolution, phylogenetic relationships and karyotype reconstruction across 25 fully sequenced genomes. With advanced tools such as Blast, Synteny and Sequence Alignment, the platform provides users with interactive and intuitive visualizations for conducting cross-species comparative genomics. As the first comprehensive pan-genomic resource for the entire Solanaceae family, SoIR facilitates in-depth cross-species analysis, supporting global research initiatives in plant evolution, functional genomics and crop improvement.}, } @article {pmid39526133, year = {2024}, author = {Binsker, U and Deneke, C and Hamid, HM and Gadicherla, AK and Göhler, A and Käsbohrer, A and Hammerl, JA}, title = {Genomic dissection of Escherichia marmotae provides insights into diversity and pathogenic potential.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae126}, doi = {10.1093/ismeco/ycae126}, pmid = {39526133}, issn = {2730-6151}, abstract = {Anthropogenic activities enhance the interconnection of human, animal, and environmental habitats and drive the evolution and inter-niche transmission of bacteria. Clear identification of emerging bacteria and pathogen control is therefore a public health priority. In 2015, the novel Escherichia species Escherichia marmotae was assigned, but due to the lack of appropriate detection and typing technologies, the One Health impact of this species is still being unraveled. E. marmotae represents a missing link in the impact of Escherichia spp. Here, we report 25 E. marmotae identified by next-generation sequencing that were previously phenotypically characterized as Escherichia coli during national zoonosis monitoring of food-producing animals. Applying fastANI to 153 738 published Escherichia spp. genome assemblies, we identified further 124 E. marmotae, originally classified as E. coli. Phylogenomics of all 149 isolates reveals an undefined population structure that is independent of the ecological niche. We highlight the phenotypic, genomic, and plasmid diversity of E. marmotae and provide evidence for gene flow across the species. The latter is illustrated by the acquisition of antibiotic resistance plasmids and pathogenicity islands, such as the type III secretion system. Thus, our comprehensive genomic overview of an emerging potential opportunistic pathogen underlines the importance of improved detection and characterization.}, } @article {pmid39525087, year = {2024}, author = {Provatas, K and Chantzi, N and Patsakis, M and Nayak, A and Mouratidis, I and Georgakopoulos-Soares, I}, title = {Microsatellites explorer: A database of short tandem repeats across genomes.}, journal = {Computational and structural biotechnology journal}, volume = {23}, number = {}, pages = {3817-3826}, doi = {10.1016/j.csbj.2024.10.041}, pmid = {39525087}, issn = {2001-0370}, abstract = {Short tandem repeats (STRs) are widespread, repetitive elements, with a number of biological functions and are among the most rapidly mutating regions in the genome. Their distribution varies significantly between taxonomic groups in the tree of life and are highly polymorphic within the human population. Advances in sequencing technologies coupled with decreasing costs have enabled the generation of an ever-growing number of complete genomes. Additionally, the arrival of accurate long reads has facilitated the generation of Telomere-to-Telomere (T2T) assemblies of complete genomes. Nevertheless, there is no comprehensive database that encompasses the STRs found per genome across different organisms and for different human genomes across diverse ancestries. Here we introduce Microsatellites Explorer, a database of STRs found in the genomes of 117,253 organisms across all major taxonomic groups, 15 T2T genome assemblies of different organisms, and 94 human haplotypes from the human pangenome. The database currently hosts 406,758,798 STR sequences, serving as a centralized user-friendly repository to perform searches, interactive visualizations, and download existing STR data for independent analysis. Microsatellites Explorer is implemented as a web-portal for browsing, analyzing and downloading STR data. Microsatellites Explorer is publicly available at https://www.microsatellitesexplorer.com.}, } @article {pmid39521956, year = {2024}, author = {Tong, C and Jia, Y and Hu, H and Zeng, Z and Chapman, B and Li, C}, title = {Pangenome and pantranscriptome as the new reference for gene family characterisation - a case study of basic helix-loop-helix (bHLH) genes in barley.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101190}, doi = {10.1016/j.xplc.2024.101190}, pmid = {39521956}, issn = {2590-3462}, abstract = {Genome-wide identification and comparative gene family analyses have been commonly performed to investigate species-specific evolution linked to various traits and molecular pathways. However, most previous studies were limited to gene screening in a single reference genome, failing to account for the gene presence/absence variations (gPAVs) in a species. Here, we propose an innovative pangenome-based approach of gene family analyses based on orthologous gene groups (OGGs). Using the basic helix-loop-helix (bHLH) transcription factor family in barley as an example, we identified 161 ∼ 176 bHLHs in 20 barley genomes, which could be classified into 201 OGGs. These 201 OGGs were further classified into 140 core, 12 soft-core, 29 shell, and 20 line-specific/cloud bHLHs, revealing a complete profile of bHLH in barley. Using a genome-scan approach, we overcome the genome annotation bias and identified on average 1.5 un-annotated core bHLHs per barley genome. We found that all core bHLHs belong to whole genome/segmental duplicates whilst dispensable bHLHs were more likely to result from small scale duplication events. Interestingly, we noticed that the dispensable bHLHs tended to enrich in specific subfamilies SF13, SF27, and SF28, implying the potential biased expansion of specific bHLHs in barley. We found that 50% of the bHLHs contain at least one intact transposon element within the 2kb upstream-to-downstream region. bHLHs with CNV have 1.48 TEs on average, significantly higher than 1.36 for core bHLH without CNV, supporting TEs' potential role in bHLH expansion. Selection pressure analyses showed that dispensable bHLHs had experienced clear relaxed selection compared to core bHLHs, consistent with their conservation patterns. We further integrate pangenome with recently available barley pantranscriptome data in 5 tissues and discovered apparent transcriptional divergence within and across bHLH subfamilies. We conclude that pangenome-based gene family analyses can better describe the genuine evolution status of bHLHs untapped before and provided novel insights into bHLH evolution in barley. We expect this study will inspire similar analyses in many other gene families and species.}, } @article {pmid39519284, year = {2024}, author = {Żebracki, K and Koper, P and Wójcik, M and Marczak, M and Mazur, A}, title = {Transcriptomic Response of Rhizobium leguminosarum to Acidic Stress and Nutrient Limitation Is Versatile and Substantially Influenced by Extrachromosomal Gene Pool.}, journal = {International journal of molecular sciences}, volume = {25}, number = {21}, pages = {}, doi = {10.3390/ijms252111734}, pmid = {39519284}, issn = {1422-0067}, support = {ZB/2021/2//Institute of Biological Sciences of the Maria Curie-Skłodowska University in Lublin, Poland/ ; }, mesh = {*Rhizobium leguminosarum/genetics/metabolism ; *Stress, Physiological/genetics ; *Transcriptome ; *Gene Expression Regulation, Bacterial ; Replicon/genetics ; Hydrogen-Ion Concentration ; Genome, Bacterial ; Nutrients/metabolism ; Bacterial Proteins/genetics/metabolism ; Gene Expression Profiling ; }, abstract = {Multipartite genomes are thought to confer evolutionary advantages to bacteria by providing greater metabolic flexibility in fluctuating environments and enabling rapid adaptation to new ecological niches and stress conditions. This genome architecture is commonly found in plant symbionts, including nitrogen-fixing rhizobia, such as Rhizobium leguminosarum bv. trifolii TA1 (RtTA1), whose genome comprises a chromosome and four extrachromosomal replicons (ECRs). In this study, the transcriptomic responses of RtTA1 to partial nutrient limitation and low acidic pH were analyzed using high-throughput RNA sequencing. RtTA1 growth under these conditions resulted in the differential expression of 1035 to 1700 genes (DEGs), which were assigned to functional categories primarily related to amino acid and carbohydrate metabolism, ribosome and cell envelope biogenesis, signal transduction, and transcription. These results highlight the complexity of the bacterial response to stress. Notably, the distribution of DEGs among the replicons indicated that ECRs played a significant role in the stress response. The transcriptomic data align with the Rhizobium pangenome analysis, which revealed an over-representation of functional categories related to transport, metabolism, and regulatory functions on ECRs. These findings confirm that ECRs contribute substantially to the ability of rhizobia to adapt to challenging environmental conditions.}, } @article {pmid39519102, year = {2024}, author = {Yan, H and Du, M and Ding, J and Song, D and Ma, W and Li, Y}, title = {Pan-Genome-Wide Investigation and Co-Expression Network Analysis of HSP20 Gene Family in Maize.}, journal = {International journal of molecular sciences}, volume = {25}, number = {21}, pages = {}, doi = {10.3390/ijms252111550}, pmid = {39519102}, issn = {1422-0067}, support = {663/1121029//This research was funded by Talent Introduction Special Funds of Qingdao Agricultural University/ ; }, mesh = {*Zea mays/genetics/metabolism ; *Gene Expression Regulation, Plant ; *Phylogeny ; *Multigene Family ; *HSP20 Heat-Shock Proteins/genetics/metabolism ; *Genome, Plant ; *Plant Proteins/genetics/metabolism ; Gene Regulatory Networks ; Stress, Physiological/genetics ; Gene Expression Profiling ; }, abstract = {Heat shock protein 20 (HSP20) is a diverse and functionally important protein family that plays a crucial role in plants' tolerance to various abiotic stresses. In this study, we systematically analyzed the structural and functional characteristics of the HSP20 gene family within the Zea pan-genome. By identifying 56 HSP20 pan-genes, we revealed the variation in the number of these genes across different maize inbreds or relatives. Among those 56 genes, only 31 are present in more than 52 inbreds or relatives. Further phylogenetic analysis classified these genes into four major groups (Class A, B, C, D) and explored their diversity in subcellular localization, physicochemical properties, and the terminal structures of those HSP20s. Through collinearity analysis and Ka/Ks ratio calculations, we found that most HSP20 genes underwent purifying selection during maize domestication, although a few genes showed signs of positive selection pressure. Additionally, expression analysis showed that several HSP20 genes were significantly upregulated under high temperatures, particularly in tassels and leaves. Co-expression network analysis revealed that HSP20 genes were significantly enriched in GO terms related to environmental stress responses, suggesting that HSP20 genes not only play key roles in heat stress but may also be involved in regulating various other biological processes, such as secondary metabolism and developmental processes. These findings expand our understanding of the functions of the maize HSP20 family and provide new insights for further research into maize's response mechanisms to environmental stresses.}, } @article {pmid39513706, year = {2024}, author = {Lawal, OU and Bryan, N and Parreira, VR and Anderson, R and Chen, Y and Precious, M and Goodridge, L}, title = {Phylogenomics of novel clones of Aeromonas veronii recovered from a freshwater lake reveals unique biosynthetic gene clusters.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0117124}, doi = {10.1128/spectrum.01171-24}, pmid = {39513706}, issn = {2165-0497}, abstract = {UNLABELLED: Aquatic ecosystems serve as crucial reservoirs for pathogens and antimicrobial resistance genes, thus presenting a significant global health risk. Here, we investigated the phylogenomics of Aeromonas veronii from Lake Wilcox in Ontario. Among the 11 bacterial isolates, nine were identified as A. veronii. Notably, 67% of A. veronii isolates were potential human pathogens. Considerable genetic diversity was noted among the A. veronii isolates, suggesting the lake as a reservoir for multiple human pathogenic strains. Comparison of the A. veronii sequenced with global A. veronii genomes highlighted significant genetic diversity and suggests widespread dissemination of strains. All the isolates carried chromosomal genes encoding resistance to β-lactams. Although virulence gene content differed between human and non-human pathogenic strains, type III secretion systems was associated with human pathogenic isolates. The assessment of AMR genes in global isolates showed that β-lactam and tetracycline resistance genes were predominant. Although the machine learning-based pangenome-wide association approach performed did not yield any source-based genes, some genes were enriched in a few isolates from different sources. The mrkABCDF operon that mediates biofilm formation and genes encoding resistance to colistin, chloramphenicol, trimethoprim, and tetracycline were enriched in animal products, whereas macrolide resistance genes and Inc plasmid-types were linked to the aquatic environment. Novel biosynthetic gene clusters were identified, suggesting that A. veronii with varying pathogenic potential could produce unique secondary metabolites. There is a need for continuous tracking of pathogens in aquatic ecosystems to contribute to our understanding of their evolutionary dynamics and the ecological roles of their genetic elements.

IMPORTANCE: Lakes and other aquatic ecosystems can harbor harmful bacteria that can make people sick and resist antibiotics, posing a significant global health risk. In this study, we investigated Aeromonas veronii, a Gram-negative bacteria found in Lake Wilcox in Ontario. We used various techniques, including whole-genome sequencing (WGS), to analyze the bacteria and found that many of the isolates had the potential to cause human disease. We also discovered significant genetic diversity among the isolates, indicating that the lake may be a reservoir for multiple human pathogenic strains. All isolates carried genes that confer resistance to antibiotics, and some virulence genes were associated with human pathogenic isolates. This study highlights the importance of monitoring aquatic ecosystems for harmful bacteria to better understand their evolution, potential for human pathogenicity, and the ecological roles of their genetic elements. This knowledge can inform strategies for preventing the spread of antibiotic-resistant bacteria and protecting public health.}, } @article {pmid39512798, year = {2024}, author = {Verma, D and Satyanarayana, T and Dias, PJ}, title = {Editorial: Microbial comparative genomics and pangenomics: new tools, approaches and insights into gene and genome evolution.}, journal = {Frontiers in genetics}, volume = {15}, number = {}, pages = {1490645}, doi = {10.3389/fgene.2024.1490645}, pmid = {39512798}, issn = {1664-8021}, } @article {pmid39512589, year = {2024}, author = {Podbielski, A and Köller, T and Warnke, P and Barrantes, I and Kreikemeyer, B}, title = {Whole genome sequencing distinguishes skin colonizing from infection-associated Cutibacterium acnes isolates.}, journal = {Frontiers in cellular and infection microbiology}, volume = {14}, number = {}, pages = {1433783}, doi = {10.3389/fcimb.2024.1433783}, pmid = {39512589}, issn = {2235-2988}, mesh = {Humans ; *Whole Genome Sequencing ; *Genome, Bacterial ; *Skin/microbiology ; *Phylogeny ; Propionibacterium acnes/genetics/isolation & purification/classification ; Prospective Studies ; Male ; Genotype ; Adult ; Female ; Polymorphism, Single Nucleotide ; Gram-Positive Bacterial Infections/microbiology ; }, abstract = {INTRODUCTION: Cutibacterium acnes can both be a helpful colonizer of the human skin as well as the causative agent of acne and purulent infections. Until today, it is a moot point whether there are C. acnes strains exclusively devoted to be part of the skin microbiome and others, that carry special features enabling them to cause disease. So far, the search for the molecular background of such diverse behavior has led to inconsistent results.

METHODS: In the present study, we prospectively collected C. acnes strains from 27 infected persons and 18 healthy controls employing rigid selection criteria to ensure their role as infectious agent or colonizer. The genome sequences from these strains were obtained and carefully controlled for quality.

RESULTS: Deduced traditional phylotyping assigned almost all superficial isolates to type IA1, while the clinical strains were evenly distributed between types IA1, IB, and II. Single locus sequence typing (SLST) showed a predominance of A1 type for the control strains, whereas 56% of the clinical isolates belonged to types A1, H1 and K8. Pangenome analysis from all the present strains and 30 published genomes indicated the presence of an open pangenome. Except for three isolates, the colonizing strains clustered in clades separate from the majority of clinical strains, while 4 clinical strains clustered with the control strains. Identical results were obtained by a single nucleotide polymorphism (SNP) analysis. However, there were no significant differences in virulence gene contents in both groups.

DISCUSSION: Genome-wide association studies (GWAS) from both the pangenome and SNP data consistently showed genomic differences between both groups located in metabolic pathway and DNA repair genes. Thus, the different behavior of colonizing and infectious C. acnes strains could be due to special metabolic capacities or flexibilities rather than specific virulence traits.}, } @article {pmid39510980, year = {2024}, author = {Mohamedikbal, S and Al-Mamun, HA and Marsh, JI and Upadhyaya, S and Danilevicz, MF and Nguyen, HT and Valliyodan, B and Mahan, A and Batley, J and Edwards, D}, title = {Local haplotyping reveals insights into the genetic control of flowering time variation in wild and domesticated soybean.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20528}, doi = {10.1002/tpg2.20528}, pmid = {39510980}, issn = {1940-3372}, support = {//Australian Government and the Government of Western Australia/ ; DP200100762//Australian Research Council/ ; DP210100296//Australian Research Council/ ; //USDA Evans Allen Project/ ; }, abstract = {The timing of flowering in soybean [Glycine max (L.) Merr.], a key legume crop, is influenced by many factors, including daylight length or photoperiodic sensitivity, that affect crop yield, productivity, and geographical adaptation. Despite its importance, a comprehensive understanding of the local linkage landscape and allelic diversity within regions of the genome influencing flowering and contributing to phenotypic variation in subpopulations has been limited. This study addresses these gaps by conducting an in-depth trait association and linkage analysis coupled with local haplotyping using advanced bioinformatics tools, including crosshap, to characterize genomic variation using a pangenome dataset representing 915 domesticated and wild-type individuals. The association analysis identified eight significant loci on seven chromosomes. Moving beyond traditional association analysis, local haplotyping of targeted regions on chromosomes 6 and 20 identified distinct haplotype structures, variation patterns, and genomic candidates influencing flowering in subpopulations. These results suggest the action of a network of genomic candidates influencing flowering time and an untapped reservoir of genomic variation for this trait in wild germplasm. Notably, GlymaLee.20G147200 on chromosome 20 was identified as a candidate gene that may cause delayed flowering in soybean, potentially through histone modifications of floral repressor loci as seen in Arabidopsis thaliana (L.) Heynh. These findings support future functional validation of haplotype-based alleles for marker-assisted breeding and genomic selection to enhance latitude adaptability of soybean without compromising yield.}, } @article {pmid39510408, year = {2024}, author = {Ren, J and Kou, W and Xu, Y and Lu, M and Gong, M and Wang, X and Zhang, X and Liu, Z and Li, H and Yang, Q and Shah, AM and Zhu, F and Hou, ZC and Xu, N and Jiang, Y and Wang, F}, title = {Pan-genome analyses add ∼1000 genes to the "complete" genome assembly of chicken.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgg.2024.10.009}, pmid = {39510408}, issn = {1673-8527}, } @article {pmid39510377, year = {2024}, author = {Liu, F and Ma, XB and Han, B and Wang, B and Xu, JP and Cao, B and Ling, ZL and He, MQ and Zhu, XY and Zhao, RL}, title = {Pan-genome analysis reveals genomic variations during enoki mushroom domestication, with emphasis on genetic signatures of cap color and stipe length.}, journal = {Journal of advanced research}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jare.2024.11.005}, pmid = {39510377}, issn = {2090-1224}, abstract = {INTRODUCTION: The domestication of edible mushrooms, including Flammulina filiformis, offers valuable insights into the genetic changes driven by artificial selection. Understanding these changes is crucial for uncovering the mechanisms behind genome evolution in domesticated mushrooms.

OBJECTIVES: This study aims to investigate the population structure, genetic diversity, and domestication-related genomic changes in F. filiformis. By comparing the genome sequences of 199 wild and cultivated strains, we aim to elucidate the impact of domestication on F. filiformis.

METHODS: We performed de novo genome assembly and gene-based pan-genome analysis on the 199 strains, which included both wild and cultivated strains. We also conducted genome-wide association studies (GWAS) using presence-absence variation (PAV) and SNP data, combined with RNA sequencing, to identify genes associated with domestication traits, such as cap color and stipe length. Gene functional confirmation was achieved through genetic transformation experiments.

RESULTS: Our analysis grouped the strains into four distinct populations, which correlated with varying intensities of artificial selection. The three cultivated populations exhibited smaller genome sizes, fewer genes, lower genetic variation, reduced gene expression diversity, and lower heterozygosity compared to the wild population. The analysis revealed the loss of genes related to the beta-lactam antibiotic catabolic process and specific MAPK pathway genes during domestication, rendering domesticated strains more susceptible to diseases. Four genes closely associated with cap color and stipe length were identified, but genetic transformation experiments confirmed the functional relevance of only two (FfB and FfD) identified through PAV-based GWAS.

CONCLUSION: This study uncovered significant genomic variations between cultivated and wild F. filiformis populations, including the loss of pathogen resistance genes during domestication. We also identified key genes linked to cap color and stipe length, demonstrating for the first time the important role of PAV variation in mushroom domestication. These insights provide a foundation for future mushroom breeding and evolutionary research.}, } @article {pmid39508593, year = {2024}, author = {Tran, TH and F Escapa, I and Roberts, AQ and Gao, W and Obawemimo, AC and Segre, JA and Kong, HH and Conlan, S and Kelly, MS and Lemon, KP}, title = {Metabolic capabilities are highly conserved among human nasal-associated Corynebacterium species in pangenomic analyses.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0113224}, doi = {10.1128/msystems.01132-24}, pmid = {39508593}, issn = {2379-5077}, abstract = {UNLABELLED: Corynebacterium species are globally ubiquitous in human nasal microbiota across the lifespan. Moreover, nasal microbiota profiles typified by higher relative abundances of Corynebacterium are often positively associated with health. Among the most common human nasal Corynebacterium species are C. propinquum, C. pseudodiphtheriticum, C. accolens, and C. tuberculostearicum. To gain insight into the functions of these four species, we identified genomic, phylogenomic, and pangenomic properties and estimated the metabolic capabilities of 87 distinct human nasal Corynebacterium strain genomes: 31 from Botswana and 56 from the United States. C. pseudodiphtheriticum had geographically distinct clades consistent with localized strain circulation, whereas some strains from the other species had wide geographic distribution spanning Africa and North America. All species had similar genomic and pangenomic structures. Gene clusters assigned to all COG metabolic categories were overrepresented in the persistent versus accessory genome of each species indicating limited strain-level variability in metabolic capacity. Based on prevalence data, at least two Corynebacterium species likely coexist in the nasal microbiota of 82% of adults. So, it was surprising that core metabolic capabilities were highly conserved among the four species indicating limited species-level metabolic variation. Strikingly, strains in the U.S. clade of C. pseudodiphtheriticum lacked genes for assimilatory sulfate reduction present in most of the strains in the Botswana clade and in the other studied species, indicating a recent, geographically related loss of assimilatory sulfate reduction. Overall, the minimal species and strain variability in metabolic capacity implies coexisting strains might have limited ability to occupy distinct metabolic niches.

IMPORTANCE: Pangenomic analysis with estimation of functional capabilities facilitates our understanding of the full biologic diversity of bacterial species. We performed systematic genomic, phylogenomic, and pangenomic analyses with qualitative estimation of the metabolic capabilities of four common human nasal Corynebacterium species, along with focused experimental validations, generating a foundational resource. The prevalence of each species in human nasal microbiota is consistent with the common coexistence of at least two species. We identified a notably high level of metabolic conservation within and among species indicating limited options for species to occupy distinct metabolic niches, highlighting the importance of investigating interactions among nasal Corynebacterium species. Comparing strains from two continents, C. pseudodiphtheriticum had restricted geographic strain distribution characterized by an evolutionarily recent loss of assimilatory sulfate reduction in U.S. strains. Our findings contribute to understanding the functions of Corynebacterium within human nasal microbiota and to evaluating their potential for future use as biotherapeutics.}, } @article {pmid39506039, year = {2024}, author = {Griffiths, DB and Tiwari, RP and Murphy, DV and Scott, C}, title = {Comparative genomics of the highly halophilic Haloferacaceae.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {27025}, pmid = {39506039}, issn = {2045-2322}, mesh = {*Genomics/methods ; Genome, Archaeal ; Phylogeny ; Genetic Variation ; Secondary Metabolism/genetics ; Haloferax/genetics/metabolism ; }, abstract = {The Haloferacaceae are a family of extremely halophilic archaea with many species producing enzymes and products beneficial for industrial biotechnology. They are, however, relatively under-characterised with regards to genetics and gene products. This study aims to use existing sequence data to highlight genetic diversity, create pangenomes for three genera, and provide secondary metabolite and pathway analysis. This will establish current knowledge and identify key gaps in research. We show that the Haloferacaceae have significant genetic diversity between genera, with numerous gene gain and loss events in key genera. It also found that the model genus, Haloferax, has relatively low identified secondary metabolites compared to other genera within the family. Additionally, this study has identified potential biotechnology targets for heterologous expression in model organisms.}, } @article {pmid39505747, year = {2024}, author = {Hwang, CY and Cho, ES and Cha, IT and Lee, KE and Lee, EY and Seo, MJ}, title = {Genome-based classification of Halobellus rubicundus sp. nov., a novel extremely halophilic archaeon isolated from a Korean solar saltern.}, journal = {Extremophiles : life under extreme conditions}, volume = {28}, number = {3}, pages = {49}, pmid = {39505747}, issn = {1433-4909}, support = {NIBR202304104//National Institute of Biological Resources/ ; }, mesh = {*Genome, Archaeal ; *Phylogeny ; *Halobacteriaceae/genetics/classification/isolation & purification ; RNA, Ribosomal, 16S/genetics ; Republic of Korea ; Base Composition ; }, abstract = {A novel extremely halophilic archaeon designated, MBLA0158[T], was isolated from a solar saltern in Sorae, Republic of Korea. The colonies are red-pigmented, Gram-stain-negative, pleomorphic, non-motile, and lysed in distilled water. The strain grows at 25-45 °C (optimum, 37 °C), in 15-30% (w/v) NaCl (optimum, 20%) and 0.1-1.0 M Mg[2+] (optimum, 0.2-0.3 M) at pH 6.0-10.0 (optimum, 7.0-8.0). Comparative analysis based on the 16S rRNA gene sequence revealed that this strain is most closely related to the Halobellus inordinatus YC20[T] with a sequence identity of 96.0%. Strain MBLA0158[T] contained phosphatidylglycerol and phosphatidylglycerol phosphate methyl ester as major polar lipids. The genome size is 3.29 Mb and the DNA G + C content is 66.9 mol%. Phylogenomic analysis confirmed that strain MBLA0158[T] is distinct from previously reported type strains of the genus Halobellus. Pan-genome analysis showed that strain MBLA0158[T] contains 419 genes that are not present in other type strains of the genus Halobellus. Based on overall analyses, strain MBLA0158[T] is considered to represent a new species of the genus Halobellus, for which the name Halobellus rubicundus sp. nov. is proposed. The type strain is MBLA0158[T] (= KCTC 4318[T] = JCM 36642[T]).}, } @article {pmid39504240, year = {2024}, author = {Peng, Y and Mao, K and Zhang, Z and Ping, J and Jin, M and Liu, X and Wu, C and Zhao, C and Wang, P and Duan, X and Yu, S and Li, Z and Liu, J and Li, H and Yesaya, A and Chen, L and Wang, H and Wilson, K and Xiao, Y}, title = {Landscape of structural variants reveals insights for local adaptations in the Asian corn borer.}, journal = {Cell reports}, volume = {43}, number = {11}, pages = {114928}, doi = {10.1016/j.celrep.2024.114928}, pmid = {39504240}, issn = {2211-1247}, abstract = {Capturing the genetic diversity of different wild populations is crucial for unraveling the mechanisms of adaptation and establishing links between genome evolution and local adaptation. The Asian corn borer (ACB) moth has undergone natural selection during its adaptative evolution. However, structural variants (SVs), which play significant roles in these adaptation processes, have not been previously identified. Here, we constructed a multi-assembly graph pan-genome to highlight the importance of SVs in local adaptation. Our analysis revealed that the graph pan-genome contained 176.60 Mb (∼37.33%) of unique sequences. Subsequently, we performed an analysis of expression quantitative trait loci (QTLs) to explore the impact of SVs on gene expression regulation. Notably, through QTL mapping analysis, we identified the FTZ-F1 gene as a potential candidate gene associated with the traits of larval development rate. In sum, we explored the impact of SVs on the local adaptation of pests, therefore facilitating accelerated pest management strategies.}, } @article {pmid39500549, year = {2024}, author = {Rana, R and Sharma, A and Madhavan, VN and Korpole, S and Sonti, RV and Patel, HK and Patil, PB}, title = {Xanthomonas protegens sp. nov., a novel rice seed-associated bacterium, provides in vivo protection against X. oryzae pv. oryzae, the bacterial leaf blight pathogen.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnae093}, pmid = {39500549}, issn = {1574-6968}, abstract = {Historically, Xanthomonas species are primarily known for their pathogenicity against plants, but recently, there have been more findings of non-pathogenic xanthomonads. In the present study, we report isolates from healthy rice seeds that belong to a new species, X. protegens, a protector of the rice plants against a serious pathogenic counterpart, i.e. X. oryzae pv. oryzae upon leaf clip co-inoculation. The new member species is non-pathogenic to rice and lacks a type III secretion system. The pangenome investigation revealed a large number of unique genes, including a novel lipopolysaccharide biosynthetic gene cluster, that might be important in its adaptation. The phylo-taxonogenomic analysis revealed that X. protegens is a taxonomic outlier species of X. sontii, a core, vertically transmitted rice seed endophyte with numerous probiotic properties. Interestingly, X. sontii is also reported as a keystone species of healthy rice seed microbiome. The findings and resources will help in the development of unique gene markers and evolutionary studies of X. sontii as a successful symbiont and X. oryzae as a serious pathogen. Here, we propose X. protegens sp. nov. as a novel species of the genus Xanthomonas with PPL118 = MTCC 13396 = CFBP 9164 = ICMP 25181 as the type strain. PPL117, PPL124, PPL125 and PPL126 are other strains of the species.}, } @article {pmid39497142, year = {2024}, author = {Koo, H and Morrow, CD}, title = {Shared and unique patterns of autonomous human endogenous retrovirus loci transcriptomes in CD14 + monocytes from individuals with physical trauma or infection with COVID-19.}, journal = {Retrovirology}, volume = {21}, number = {1}, pages = {17}, pmid = {39497142}, issn = {1742-4690}, mesh = {Humans ; *Endogenous Retroviruses/genetics ; *COVID-19/immunology/virology/genetics ; *Lipopolysaccharide Receptors/genetics ; *Monocytes/immunology/virology ; *Transcriptome ; *SARS-CoV-2/genetics/immunology ; *Wounds and Injuries/virology/immunology/genetics ; Immunity, Innate/genetics ; Male ; Female ; Genetic Loci ; }, abstract = {Since previous studies have suggested that the RNAs of human endogenous retrovirus (HERV) might be involved in regulating innate immunity, it is important to investigate the HERV transcriptome patterns in innate immune cell types such as CD14 + monocytes. Using single cell RNA-seq datasets from resting or stimulated PBMCs mapped to 3,220 known discrete autonomous proviral HERV loci, we found individual-specific variation in HERV transcriptomes between HERV loci in CD14 + monocytes. Analysis of paired datasets from the same individual that were cultured in vitro with LPS or without (i.e. control) revealed 36 HERV loci in CD14 + monocytes that were detected only after activation. To extend our analysis to in vivo activated CD14 + monocytes, we used two scRNA-seq datasets from studies that had demonstrated activation of circulating CD14 + monocytes in patients with physical trauma or patients hospitalized with COVID-19 infections. For direct comparison between the trauma and COVID-19 datasets, we first analyzed 1.625 billion sequence reads from a composite pangenome control of 21 normal individuals. Comparison of the sequence read depth of HERV loci in the trauma or COVID-19 samples to the pangenome control revealed that 39 loci in the COVID-19 and 11 HERV loci in the trauma samples were significantly different (Mann-Whitney U test), with 9 HERV loci shared between the COVID-19 and trauma datasets. The capacity to compare HERV loci transcriptome patterns in innate immune cells, like CD14 + monocytes, across different pathological conditions will lead to greater understanding of the physiological role of HERV expression in health and disease.}, } @article {pmid39497039, year = {2024}, author = {Avila Cartes, J and Bonizzoni, P and Ciccolella, S and Della Vedova, G and Denti, L}, title = {PangeBlocks: customized construction of pangenome graphs via maximal blocks.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {344}, pmid = {39497039}, issn = {1471-2105}, support = {956229//H2020 Marie Skłodowska-Curie Actions/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 872539//European Union's Horizon 2020 Research and Innovation Staff Exchange programme/ ; 872539//European Union's Horizon 2020 Research and Innovation Staff Exchange programme/ ; 872539//European Union's Horizon 2020 Research and Innovation Staff Exchange programme/ ; 872539//European Union's Horizon 2020 Research and Innovation Staff Exchange programme/ ; 2022YRB97K//Ministero dell'Istruzione, dell'Università e della Ricerca/ ; 2022YRB97K//Ministero dell'Istruzione, dell'Università e della Ricerca/ ; 2022YRB97K//Ministero dell'Istruzione, dell'Università e della Ricerca/ ; }, abstract = {BACKGROUND: The construction of a pangenome graph is a fundamental task in pangenomics. A natural theoretical question is how to formalize the computational problem of building an optimal pangenome graph, making explicit the underlying optimization criterion and the set of feasible solutions. Current approaches build a pangenome graph with some heuristics, without assuming some explicit optimization criteria. Thus it is unclear how a specific optimization criterion affects the graph topology and downstream analysis, like read mapping and variant calling.

RESULTS: In this paper, by leveraging the notion of maximal block in a Multiple Sequence Alignment (MSA), we reframe the pangenome graph construction problem as an exact cover problem on blocks called Minimum Weighted Block Cover (MWBC). Then we propose an Integer Linear Programming (ILP) formulation for the MWBC problem that allows us to study the most natural objective functions for building a graph. We provide an implementation of the ILP approach for solving the MWBC and we evaluate it on SARS-CoV-2 complete genomes, showing how different objective functions lead to pangenome graphs that have different properties, hinting that the specific downstream task can drive the graph construction phase.

CONCLUSION: We show that a customized construction of a pangenome graph based on selecting objective functions has a direct impact on the resulting graphs. In particular, our formalization of the MWBC problem, based on finding an optimal subset of blocks covering an MSA, paves the way to novel practical approaches to graph representations of an MSA where the user can guide the construction.}, } @article {pmid39496880, year = {2024}, author = {Liu, Z and Wang, N and Su, Y and Long, Q and Peng, Y and Shangguan, L and Zhang, F and Cao, S and Wang, X and Ge, M and Xue, H and Ma, Z and Liu, W and Xu, X and Li, C and Cao, X and Ahmad, B and Su, X and Liu, Y and Huang, G and Du, M and Liu, Z and Gan, Y and Sun, L and Fan, X and Zhang, C and Zhong, H and Leng, X and Ren, Y and Dong, T and Pei, D and Wu, X and Jin, Z and Wang, Y and Liu, C and Chen, J and Gaut, B and Huang, S and Fang, J and Xiao, H and Zhou, Y}, title = {Grapevine pangenome facilitates trait genetics and genomic breeding.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {39496880}, issn = {1546-1718}, abstract = {Grapevine breeding is hindered by a limited understanding of the genetic basis of complex agronomic traits. This study constructs a graph-based pangenome reference (Grapepan v.1.0) from 18 newly generated phased telomere-to-telomere assemblies and 11 published assemblies. Using Grapepan v.1.0, we build a variation map with 9,105,787 short variations and 236,449 structural variations (SVs) from the resequencing data of 466 grapevine cultivars. Integrating SVs into a genome-wide association study, we map 148 quantitative trait loci for 29 agronomic traits (50.7% newly identified), with 12 traits significantly contributed by SVs. The estimated heritability improves by 22.78% on average when including SVs. We discovered quantitative trait locus regions under divergent artificial selection in metabolism and berry development between wine and table grapes, respectively. Moreover, significant genetic correlations were detected among the 29 traits. Under a polygenic model, we conducted genomic predictions for each trait. In general, our study facilitates the breeding of superior cultivars via the genomic selection of multiple traits.}, } @article {pmid39496665, year = {2024}, author = {Kim, J and Kim, Y and Shin, J and Kim, YK and Lee, DH and Park, JW and Lee, D and Kim, HC and Lee, JH and Lee, SH and Kim, J}, title = {Fully phased genome assemblies and graph-based genetic variants of the olive flounder, Paralichthys olivaceus.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {1193}, pmid = {39496665}, issn = {2052-4463}, support = {R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; R2024032//National Institute of Fisheries Science (NIFS)/ ; }, mesh = {Animals ; *Flounder/genetics ; *Genome ; Genetic Variation ; Republic of Korea ; }, abstract = {The olive flounder, Paralichthys olivaceus, also known as the Korean halibut, is an economically important flatfish in East Asian countries. Here, we provided four fully phased genome assemblies of two different olive flounder individuals using high-fidelity long-read sequencing and their parental short-read sequencing data. We obtained 42-44 Gb of ~15-kb and ~Q30 high-fidelity long reads, and their assembly quality values were ~53. We annotated ~30 K genes, ~170-Mb repetitive sequences, and ~3 M 5-methylcytosine positions for each genome assembly, and established a graph-based draft pan-genome of the olive flounder. We identified 5 M single-nucleotide variants and 100 K structural variants with their genotype information, where ~13% of the variants were possibly fixed in the two Korean individuals. Based on our chromosome-level genome assembly, we also explored chromosome evolution in the Pleuronectiformes family, as reported earlier. Our high-quality genomic resources will contribute to future genomic selection for accelerating the breeding process of the olive flounder.}, } @article {pmid39495470, year = {2024}, author = {Zhuo, ZX and Feng, YL and Zhang, XW and Liu, H and Zeng, FY and Li, XY}, title = {Whole-Genome Sequencing Reveals the Population Structure and Genetic Diversity of Salmonella Typhimurium ST34 and ST19 Lineages.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {}, number = {}, pages = {}, pmid = {39495470}, issn = {1976-3794}, support = {YZ2022QN02//President Foundation of The Fifth Affiliated Hospital, Southern Medical University/ ; 2022A1515012481//Basic and Applied Basic Research Foundation of Guangdong Province Natural Science Foundation/ ; }, abstract = {Salmonella Typhimurium is an invasive gastrointestinal pathogen for both humans and animals. To investigate the genetic framework and diversity of S. Typhimurium, a total of 194 S. Typhimurium isolates were collected from patients in a tertiary hospital between 2020 and 2021. Antimicrobial susceptibility testing was used to confirm the resistance phenotype. Whole-genome sequencing and bioinformatics analysis were performed to determine the sequence type, phylogenetic relationships, resistance gene profiles, Salmonella pathogenicity island (SPI) and the diversity of the core and pan genome. The result showed that 57.22% of S. Typhimurium isolates were multidrug resistant and resistance of total isolates to the first-line drug ciprofloxacin was identified in 60.82%. The population structure of S. Typhimurium was categorized into three lineages: ST19 (20.10%, 39/194), ST34-1 (47.42%, 92/194) and ST34-2 (40.65%, 63/194), with the population size exhibiting increasing trends. All lineages harbored variety of fimbrial operons, prophages, SPIs and effectors that contributed to the virulence and long-term infections of S. Typhimurium. Importantly, ST34-1 lineage might potentially be more invasive due to the possession of SPI1-effector gene sopE which was essential for the proliferation, internalization and intracellular presence of S. Typhimurium in hosts. Multiple antimicrobial resistance genes were characteristically distributed across three lineages, especially carbapenem genes only detected in ST34-1&2 lineages. The distinct functional categories of pan genome among three lineages were observed in metabolism, signaling and gene information processing. This study provides a theoretical foundation for the evolved adaptation and genetic diversity of S. Typhimurium ST19 and ST34, among which ST34 lineages with multidrug resistance and potential hypervirulence need to pay more attention to epidemiological surveillance.}, } @article {pmid39491772, year = {2024}, author = {Ananna, NT and Shishir, TA and Ahmed, A and Sium, SMA and Shakil, MS and Haque, DFKM and Hasanuzzaman, M}, title = {Characterization of Two Lytic Bacteriophages Infecting Carbapenem-Resistant Clinical Klebsiella pneumoniae in Dhaka, Bangladesh.}, journal = {Virus research}, volume = {}, number = {}, pages = {199491}, doi = {10.1016/j.virusres.2024.199491}, pmid = {39491772}, issn = {1872-7492}, abstract = {Bacteriophages or bacteria infecting viruses are genetically diverse. Due to the emergence of antimicrobial-resistant bacteria, lytic bacteriophages are gaining enormous attention for treating superbug infections. Klebsiella pneumoniae is one of the eight most significant nosocomial pathogens and is addressed as a critical priority pathogen by WHO, requiring alternative treatment options. We reported two highly lytic bacteriophages, Klebsiella phage Kpn BM7 and the novel Klebsiella phage Kpn BU9, isolated from hospital wastewater and exhibiting lytic activity against different clinical isolates. Whole-genome analysis revealed that phages BM7 and BU9 belong to class Caudoviricetes. Phage BM7, with a genome length of 170,558 bp, is a member of the genus Marfavirus and the species Marfavirus F48, while phage BU9, with a genome length of 60,450 bp, remains unclassified. Neither phage harbors any lysogenic, toxin, or antimicrobial resistance genes. Both phages can steadily survive up to 40°C and at pH 5-7. The optimal MOI was 0.1 for BM7 and 1 for BU9, with short latent periods of 10 and 25 min and burst sizes of 85 PFU/cell and 12 PFU/cell, respectively. This is the first carbapenem-resistant K. pneumoniae (CRKP) targeting lytic phages to be reported from Bangladesh. This study suggests that BM7 and BU9 are potential candidates for targeting carbapenem-resistant K. pneumoniae.}, } @article {pmid39487719, year = {2024}, author = {Paineau, M and Zaccheo, M and Massonnet, M and Cantu, D}, title = {Advances in grape and pathogen genomics toward durable grapevine disease resistance.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae450}, pmid = {39487719}, issn = {1460-2431}, abstract = {The future sustainability of viticulture depends on the development of grapevine cultivars with genetic resistance to diseases such as powdery mildew, downy mildew, and Pierce's disease. Recent advances in grape and pathogen genomics have dramatically improved our approach to durable disease resistance. The availability of diploid genome references for wild species, combined with the ability to phase resistance haplotypes and conduct genome-wide association and expression analyses, has greatly enhanced our ability to dissect genetic resistance loci. This progress is yielding candidate genes that will form the foundation for precise breeding, gene stacking, and genome editing in grape improvement programs. As resistance genes are deployed in vineyards, pathogen populations evolve to adapt and evade these defenses, posing ongoing challenges. Understanding the adaptive mechanisms of grapevine pathogens in response to resistant cultivars is crucial. Grape pathogenomics is advancing rapidly, marked by the sequencing of many pathogen genomes, the discovery of effectors, including the first ones responsible for disease resistance breakdown, and the development of graph-based pangenomes. These advancements offer valuable insights into pathogen evolution and inform strategies for sustainable disease management. Together, these genomic tools and insights are paving the way for developing resilient grapevine varieties, ensuring the long-term sustainability of viticulture.}, } @article {pmid39487120, year = {2024}, author = {Young, MG and Straub, TJ and Worby, CJ and Metsky, HC and Gnirke, A and Bronson, RA and van Dijk, LR and Desjardins, CA and Matranga, C and Qu, J and Villicana, JB and Azimzadeh, P and Kau, A and Dodson, KW and Schreiber, HL and Manson, AL and Hultgren, SJ and Earl, AM}, title = {Distinct Escherichia coli transcriptional profiles in the guts of recurrent UTI sufferers revealed by pangenome hybrid selection.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {9466}, pmid = {39487120}, issn = {2041-1723}, support = {U19AI110818//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; R01DK121822//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; R01AI165915//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; R01DK121822//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; }, mesh = {Humans ; *Urinary Tract Infections/microbiology/genetics ; Female ; *Escherichia coli Infections/microbiology/genetics ; *Transcriptome/genetics ; *Escherichia coli/genetics ; *Uropathogenic Escherichia coli/genetics ; *Genome, Bacterial/genetics ; Recurrence ; Feces/microbiology ; Gastrointestinal Microbiome/genetics ; Adult ; Gastrointestinal Tract/microbiology ; }, abstract = {Low-abundance members of microbial communities are difficult to study in their native habitats, including Escherichia coli, a minor but common inhabitant of the gastrointestinal tract, and key opportunistic pathogen of the urinary tract. While multi-omic analyses have detailed interactions between uropathogenic Escherichia coli (UPEC) and the bladder mediating urinary tract infection (UTI), little is known about UPEC in its pre-infection reservoir, the gastrointestinal tract, partly due to its low relative abundance (<1%). To sensitively explore the genomes and transcriptomes of diverse gut E. coli, we develop E. coli PanSelect, which uses probes designed to specifically capture E. coli's broad pangenome. We demonstrate its ability to enrich diverse E. coli by orders of magnitude, in a mock community and in human stool from a study investigating recurrent UTI (rUTI). Comparisons of transcriptomes between gut E. coli of women with and without history of rUTI suggest rUTI gut E. coli are responding to increased oxygen and nitrate, suggestive of mucosal inflammation, which may have implications for recurrent disease. E. coli PanSelect is well suited for investigations of in vivo E. coli biology in other low-abundance environments, and the framework described here has broad applicability to other diverse, low-abundance organisms.}, } @article {pmid39485561, year = {2024}, author = {Achudhan, AB and Saleena, LM}, title = {Comparative genomic analysis and characterization of novel high-quality draft genomes from the coal metagenome.}, journal = {World journal of microbiology & biotechnology}, volume = {40}, number = {12}, pages = {370}, pmid = {39485561}, issn = {1573-0972}, mesh = {*Coal/microbiology ; *Metagenome ; *Phylogeny ; *Bacteria/genetics/classification ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; Genomics/methods ; Metagenomics/methods ; Microbiota/genetics ; DNA, Bacterial/genetics ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Coal, a sedimentary rock harbours a complex microbial community that plays a significant role in its formation and characteristics. However, coal metagenome sequencing and studies were less, limiting our understanding of this complex ecosystem. This study aimed to reconstruct high-quality metagenome-assembled genomes (MAGs) from the coal sample collected in the Neyveli mine to explore the unrevealed diversity of the coal microbiome. Using Illumina sequencing, we obtained high-quality raw reads in FASTQ format. Subsequently, de novo assembly and binning with metaWRAP software facilitated the reconstruction of coal MAGs. Quality assessment using CheckM identified 10 High-Quality MAGs (HQ MAGs), 7 medium-quality MAGs (MQ MAGs), and 6 low-quality MAGs (LQ MAGs). Further analysis using GTDB-Tk revealed four HQ MAGs as known species like Dermacoccus abyssi, Sphingomonas aquatilis, Acinetobacter baumannii, and Burkholderia cenocepacia. The remaining six HQ MAGs were classified as Comamonas, Arthrobacter, Noviherbaspirillum, Acidovorax, Oxalicibacterium, and Bordetella and designated as novel genomes by the validation of digital DNA-DNA hybridization (dDDH). Phylogenetic analysis and further pangenome analysis across the phylogenetic groups revealed a similar pattern with a high proportion of cloud genes. We further analysed the functional potential of these MAGs and closely related genomes using COG. The comparative functional genomics revealed that novel genomes are highly versatile, potentially reflecting adaptations to the coal environment. BlastKOALA was used to conduct a detailed analysis of the metabolic pathways associated with the MAGs. This study highlights the comparative genomic analysis of novel coal genomes with their closely related genomes to understand the evolutionary relationships and functional properties.}, } @article {pmid39482604, year = {2024}, author = {Kaur, H and Shannon, LM and Samac, DA}, title = {A stepwise guide for pangenome development in crop plants: an alfalfa (Medicago sativa) case study.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {1022}, pmid = {39482604}, issn = {1471-2164}, support = {5026-12210-004-00D//USDA-ARS/ ; 5026-12210-004-00D//USDA-ARS/ ; 5026-12210-004-00D//USDA-ARS/ ; }, mesh = {*Genome, Plant ; *Medicago sativa/genetics ; *Crops, Agricultural/genetics ; Genomics/methods ; Polyploidy ; }, abstract = {BACKGROUND: The concept of pangenomics and the importance of structural variants is gaining recognition within the plant genomics community. Due to advancements in sequencing and computational technology, it has become feasible to sequence the entire genome of numerous individuals of a single species at a reasonable cost. Pangenomes have been constructed for many major diploid crops, including rice, maize, soybean, sorghum, pearl millet, peas, sunflower, grapes, and mustards. However, pangenomes for polyploid species are relatively scarce and are available in only few crops including wheat, cotton, rapeseed, and potatoes.

MAIN BODY: In this review, we explore the various methods used in crop pangenome development, discussing the challenges and implications of these techniques based on insights from published pangenome studies. We offer a systematic guide and discuss the tools available for constructing a pangenome and conducting downstream analyses. Alfalfa, a highly heterozygous, cross pollinated and autotetraploid forage crop species, is used as an example to discuss the concerns and challenges offered by polyploid crop species. We conducted a comparative analysis using linear and graph-based methods by constructing an alfalfa graph pangenome using three publicly available genome assemblies. To illustrate the intricacies captured by pangenome graphs for a complex crop genome, we used five different gene sequences and aligned them against the three graph-based pangenomes. The comparison of the three graph pangenome methods reveals notable variations in the genomic variation captured by each pipeline.

CONCLUSION: Pangenome resources are proving invaluable by offering insights into core and dispensable genes, novel gene discovery, and genome-wide patterns of variation. Developing user-friendly online portals for linear pangenome visualization has made these resources accessible to the broader scientific and breeding community. However, challenges remain with graph-based pangenomes including compatibility with other tools, extraction of sequence for regions of interest, and visualization of genetic variation captured in pangenome graphs. These issues necessitate further refinement of tools and pipelines to effectively address the complexities of polyploid, highly heterozygous, and cross-pollinated species.}, } @article {pmid39476558, year = {2024}, author = {Sabety, J and Svara, A and Tegtmeier, R and Feulner, H and Cho, P and Sakina, A and Hickok, D and Khan, A}, title = {Unlocking diversity from wild relatives of perennial fruit crops in the pan-genomics era.}, journal = {Current opinion in plant biology}, volume = {82}, number = {}, pages = {102652}, doi = {10.1016/j.pbi.2024.102652}, pmid = {39476558}, issn = {1879-0356}, abstract = {Crop wild relatives of perennial fruit crops have a wealth of untapped genetic diversity that can be utilized for cultivar development. However, barriers such as linkage drag, long juvenility, and high heterozygosity have hindered their utilization. Advancements in genome sequencing technologies and assembly methods, combined with the integration of chromosome conformation capture have made it possible to construct high-quality reference genomes. These genome assemblies can be combined into pan-genomes, capturing inter- and intraspecific variations across coding and non-coding regions. Pan-genomes of perennial fruit crops are being developed to identify the genetic basis of traits. This will help overcome breeding challenges, enabling faster and more targeted development of new cultivars with novel traits through breeding and biotechnology.}, } @article {pmid39483525, year = {2024}, author = {Contreras-Peruyero, H and Guerrero-Flores, S and Zirión-Martínez, C and Mejía-Ponce, PM and Navarro-Miranda, M and Lovaco-Flores, JA and Ibarra-Rodríguez, JM and Pashkov, A and Licona-Cassani, C and Sélem-Mojica, N}, title = {Meeting the challenge of genomic analysis: a collaboratively developed workshop for pangenomics and topological data analysis.}, journal = {Bioinformatics advances}, volume = {4}, number = {1}, pages = {vbae139}, pmid = {39483525}, issn = {2635-0041}, abstract = {MOTIVATION: As genomics data analysis becomes increasingly intricate, researchers face the challenge of mastering various software tools. The rise of Pangenomics analysis, which examines the complete set of genes in a group of genomes, is particularly transformative in understanding genetic diversity. Our interdisciplinary team of biologists and mathematicians developed a short Pangenomics Workshop covering Bash, Python scripting, Pangenome, and Topological Data Analysis. These skills provide deeper insights into genetic variations and their implications in Evolutionary Biology. The workshop uses a Conda environment for reproducibility and accessibility. Developed in The Carpentries Incubator infrastructure, the workshop aims to equip researchers with essential skills for Pangenomics research. By emphasizing the role of a community of practice, this work underscores its significance in empowering multidisciplinary professionals to collaboratively develop training that adheres to best practices.

RESULTS: Our workshop delivers tangible outcomes by enhancing the skill sets of Computational Biology professionals. Participants gain hands-on experience using real data from the first described pangenome. We share our paths toward creating an open-source, multidisciplinary, and public resource where learners can develop expertise in Pangenomic Analysis. This initiative goes beyond advancing individual capabilities, aligning with the broader mission of addressing educational needs in Computational Biology.

https://carpentries-incubator.github.io/pangenomics-workshop/.}, } @article {pmid39475287, year = {2024}, author = {Maggi, F and Giuliodori, AM and Brandi, A and Cimarelli, L and Alcántara, R and Pallotti, S and Amantini, C and Petrelli, D and Fabbretti, A and Spurio, R and Napolioni, V}, title = {Pangenome analysis of Paenibacillus polymyxa strains reveals the existence of multiple and functionally distinct Paenibacillus species.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0174024}, doi = {10.1128/aem.01740-24}, pmid = {39475287}, issn = {1098-5336}, abstract = {UNLABELLED: Paenibacillus polymyxa, a Gram-positive bacterium commonly found in soil and plant roots, plays an important role in the environment due to its nitrogen-fixing ability and is renowned for producing antibiotics like polymyxin. In this study, we present a robust framework for investigating the evolutionary and taxonomic connections of strains belonging to P. polymyxa available at the National Center for Biotechnology Information, as well as five new additional strains isolated at the University of Camerino (Italy), through pangenome analysis. These strains can produce secondary metabolites active against Staphylococcus aureus and Klebsiella pneumoniae. Employing techniques such as digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI) estimation, OrthoFinder, and ribosomal multilocus sequence typing, we consistently divided these P. polymyxa strains into four clusters, which differ significantly in terms of ANI and dDDH percentages, both considered as reference indices for separating bacterial species. Moreover, the strains of Cluster 2 were re-classified as belonging to the Paenibacillus ottowii species. By comparing the pangenomes, we identified the core genes of each cluster and analyzed them to recognize distinctive features in terms of biosynthetic/metabolic potential. The comparison of pangenomes also allowed us to pinpoint differences between clusters in terms of genetic variability and the percentage of the genome dedicated to core and accessory genes. In conclusion, the data obtained from our analyses of strains belonging to the P. polymyxa species converge toward a necessary reclassification, which will require a fundamental contribution from microbiologists in the near future.

IMPORTANCE: The development of sequencing technologies has led to an exponential increase in microbial sequencing data. Accurately identifying bacterial species remains a challenge because of extensive intra-species variability, the need for multiple identification methods, and the rapid rate of taxonomic changes. A substantial contribution to elucidating the relationships among related bacterial strains comes from comparing their genomic sequences. This comparison also allows for the identification of the "pangenome," which is the set of genes shared by all individuals of a species, as well as the set of genes that are unique to subpopulations. Here, we applied this approach to Paenibacillus polymyxa, a species studied for its potential as a biofertilizer and biocontrol agent and known as an antibiotic producer. Our work highlights the need for a more efficient classification of this bacterial species and provides a better delineation of strains with different properties.}, } @article {pmid39472795, year = {2024}, author = {Andriyanov, P and Zhurilov, P and Menshikova, A and Tutrina, A and Yashin, I and Kashina, D}, title = {Large-scale genomic analysis of Elizabethkingia anophelis.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {1015}, pmid = {39472795}, issn = {1471-2164}, mesh = {*Flavobacteriaceae/genetics ; *Genome, Bacterial ; *Phylogeny ; *Genomics/methods ; Gene Transfer, Horizontal ; Humans ; Flavobacteriaceae Infections/microbiology ; Animals ; Drug Resistance, Bacterial/genetics ; }, abstract = {The recent emergence of Elizabethkingia anophelis as a human pathogen is a major concern for global public health. This organism has the potential to cause severe infections and has inherent antimicrobial resistance. The potential for widespread outbreaks and rapid global spread highlights the critical importance of understanding the biology and transmission dynamics of this infectious agent. We performed a large-scale analysis of available 540 E. anophelis, including one novel strain isolated from raw milk and sequenced in this study. Pan-genome analysis revealed an open and diverse pan-genome in this species, characterized by the presence of many accessory genes. This suggests that the species has a high level of adaptability and can thrive in a variety of environments. Phylogenetic analysis has also revealed a complex population structure, with limited source-lineage correlation. We identified diverse antimicrobial resistance factors, including core-genome and accessory ones often associated with mobile genetic elements within specific lineages. Mobilome analysis revealed a dynamic landscape primarily composed of genetic islands, integrative and conjugative elements, prophage elements, and small portion of plasmids emphasizing a complex mechanism of horizontal gene transfer. Our study underscores the adaptability of E. anophelis, characterized by a diverse range of antimicrobial resistance genes, putative virulence factors, and genes enhancing fitness. This adaptability is also supported by the organism's ability to acquire genetic material through horizontal gene transfer, primarily facilitated by mobile genetic elements such as integrative and conjugative elements (ICEs). The potential for rapid evolution of this emerging pathogen poses a significant challenge to public health efforts.}, } @article {pmid39472693, year = {2024}, author = {Li, J and Liu, Z and You, C and Qi, Z and You, J and Grover, CE and Long, Y and Huang, X and Lu, S and Wang, Y and Zhang, S and Wang, Y and Bai, R and Zhang, M and Jin, S and Nie, X and Wendel, JF and Zhang, X and Wang, M}, title = {Convergence and divergence of diploid and tetraploid cotton genomes.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {39472693}, issn = {1546-1718}, abstract = {Polyploidy is an important driving force in speciation and evolution; however, the genomic basis for parallel selection of a particular trait between polyploids and ancestral diploids remains unexplored. Here we construct graph-based pan-genomes for diploid (A2) and allotetraploid (AD1) cotton species, enabled by an assembly of 50 genomes of genetically diverse accessions. We delineate a mosaic genome map of tetraploid cultivars that illustrates genomic contributions from semi-wild forms into modern cultivars. Pan-genome comparisons identify syntenic and hyper-divergent regions of continued variation between diploid and tetraploid cottons, and suggest an ongoing process of sequence evolution potentially linked to the contrasting genome size change in two subgenomes. We highlight 43% of genetic regulatory relationships for gene expression in diploid encompassing sequence divergence after polyploidy, and specifically characterize six underexplored convergent genetic loci contributing to parallel selection of fiber quality. This study offers a framework for pan-genomic dissection of genetic regulatory components underlying parallel selection of desirable traits in organisms.}, } @article {pmid39472552, year = {2024}, author = {Guo, M and Lian, Q and Mei, Y and Yang, W and Zhao, S and Zhang, S and Xing, X and Zhang, H and Gao, K and He, W and Wang, Z and Wang, H and Zhou, J and Cheng, L and Bao, Z and Huang, S and Yan, J and Zhao, X}, title = {Analyzes of pan-genome and resequencing atlas unveil the genetic basis of jujube domestication.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {9320}, pmid = {39472552}, issn = {2041-1723}, mesh = {*Ziziphus/genetics ; *Domestication ; *Genome, Plant ; *Genetic Variation ; Fruit/genetics/anatomy & histology/growth & development ; Genomics/methods ; Polymorphism, Single Nucleotide ; }, abstract = {Jujube (Ziziphus jujuba Mill.), belonging to the Rhamnaceae family, is gaining increasing prominence as a perennial fruit crop with significant economic and medicinal values. Here, we conduct de novo assembly of four reference-grade genomes, encompassing one wild and three cultivated jujube accessions. We present insights into the population structure, genetic diversity, and genomic variations within a diverse collection of 1059 jujube accessions. Analyzes of the jujube pan-genome, based on our four assemblies and four previously released genomes, reveal extensive genomic variations within domestication-associated regions, potentially leading to the discovery of a candidate gene that regulates flowering and fruit ripening. By leveraging the pan-genome and a large-scale resequencing population, we identify two candidate genes involved in domestication traits, including the seed-setting rate, the bearing-shoot length and the leaf size in jujube. These genomic resources will accelerate evolutionary and functional genomics studies of jujube.}, } @article {pmid39471242, year = {2024}, author = {Yañez-Olvera, AG and Gómez-Díaz, AG and Sélem-Mojica, N and Rodríguez-Orduña, L and Lara-Ávila, JP and Varni, V and Alcoba, F and Croce, V and Legros, T and Torres, A and Torres Ruíz, A and Tarrats, F and Vermunt, A and Looije, T and Cibrian-Jaramillo, A and Valenzuela, M and Siri, MI and Barona-Gomez, F}, title = {A host shift as the origin of tomato bacterial canker caused by Clavibacter michiganensis.}, journal = {Microbial genomics}, volume = {10}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001309}, pmid = {39471242}, issn = {2057-5858}, mesh = {*Solanum lycopersicum/microbiology ; *Plant Diseases/microbiology ; *Phylogeny ; *Clavibacter/genetics ; Actinobacteria/genetics/classification/isolation & purification ; Genome, Bacterial ; Evolution, Molecular ; }, abstract = {The Actinomycetota (formerly Actinobacteria) genus Clavibacter includes phytopathogens with devasting effects in several crops. Clavibacter michiganensis, the causal agent of tomato bacterial canker, is the most notorious species of the genus. Yet, its origin and natural reservoirs remain elusive, and its populations show pathogenicity profiles with unpredictable plant disease outcomes. Here, we generate and analyse a decade-long genomic dataset of Clavibacter from wild and commercial tomato cultivars, providing evolutionary insights that directed phenotypic characterization. Our phylogeny situates the last common ancestor of C. michiganensis next to Clavibacter isolates from grasses rather than to the sole strain we could isolate from wild tomatoes. Pathogenicity profiling of C. michiganensis isolates, together with C. phaseoli and C. californiensis as sister taxa and the wild tomato strain, was found to be congruent with the proposed phylogenetic relationships. We then identified gene enrichment after the evolutionary event, leading to the appearance of the C. michiganesis clade, including known pathogenicity factors but also hitherto unnoticed genes with the ability to encode adaptive traits for a pathogenic lifestyle. The holistic perspective provided by our evolutionary analyses hints towards a host shift event as the origin of C. michiganensis as a tomato pathogen and the existence of pathogenic genes that remain to be characterized.}, } @article {pmid39470715, year = {2024}, author = {Miao, Z and Ren, Y and Tarabini, A and Yang, L and Li, H and Ye, C and Liti, G and Fischer, G and Li, J and Yue, JX}, title = {ScRAPdb: an integrated pan-omics database for the Saccharomyces cerevisiae reference assembly panel.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae955}, pmid = {39470715}, issn = {1362-4962}, support = {32070592//National Natural Science Foundation of China/ ; 2022A1515010717//Guangdong Basic and Applied Basic Research Foundation/ ; 2019QN01Y183//Guangdong Pearl River Talents Program/ ; YTP-SYSUCC-0042//Sun Yat-sen University Cancer Center/ ; 24qnpy293//Fundamental Research Funds for the Central Universities/ ; }, abstract = {As a unicellular eukaryote, the budding yeast Saccharomyces cerevisiae strikes a unique balance between biological complexity and experimental tractability, serving as a long-standing classic model for both basic and applied studies. Recently, S. cerevisiae further emerged as a leading system for studying natural diversity of genome evolution and its associated functional implication at population scales. Having high-quality comparative and functional genomics data are critical for such efforts. Here, we exhaustively expanded the telomere-to-telomere (T2T) S. cerevisiae reference assembly panel (ScRAP) that we previously constructed for 142 strains to cover high-quality genome assemblies and annotations of 264 S. cerevisiae strains from diverse geographical and ecological niches and also 33 outgroup strains from all the other Saccharomyces species complex. We created a dedicated online database, ScRAPdb (https://www.evomicslab.org/db/ScRAPdb/), to host this expanded pangenome collection. Furthermore, ScRAPdb also integrates an array of population-scale pan-omics atlases (pantranscriptome, panproteome and panphenome) and extensive data exploration toolkits for intuitive genomics analyses. All curated data and downstream analysis results can be easily downloaded from ScRAPdb. We expect ScRAPdb to become a highly valuable platform for the yeast community and beyond, leading to a pan-omics understanding of the global genetic and phenotypic diversity.}, } @article {pmid39470281, year = {2024}, author = {Junker, S and Singh, V and Al-Saadi, AGM and Wood, NA and Hamilton-Brehm, SD and Ouellette, SP and Fisher, DJ}, title = {Distinct impacts of each anti-anti-sigma factor ortholog of the chlamydial Rsb partner switching mechanism on development in Chlamydia trachomatis.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0184624}, doi = {10.1128/spectrum.01846-24}, pmid = {39470281}, issn = {2165-0497}, abstract = {Partner switching mechanisms (PSMs) are signal transduction systems comprised of a sensor phosphatase (RsbU), an anti-sigma factor (RsbW, kinase), an anti-anti-sigma factor (RsbV, the RsbW substrate), and a target sigma factor. Chlamydia spp. are obligate intracellular bacterial pathogens of animals that undergo a developmental cycle transitioning between the infectious elementary body (EB) and replicative reticulate body (RB) within a host cell-derived vacuole (inclusion). Secondary differentiation events (RB to EB) are transcriptionally regulated, in part, by the housekeeping sigma factor (σ[66]) and two late-gene sigma factors (σ[54] and σ[28]). Prior research supports that the PSM in Chlamydia trachomatis regulates availability of σ[66]. Pan-genome analysis revealed that PSM components are conserved across the phylum Chlamydiota, with Chlamydia spp. possessing an atypical arrangement of two anti-anti-sigma factors, RsbV1 and RsbV2. Bioinformatic analyses support RsbV2 as the homolog to the pan-genome-conserved RsbV with RsbV1 as an outlier. This, combined with in vitro data, indicates that RsbV1 and RsbV2 are structurally and biochemically distinct. Reduced levels or overexpression of RsbV1/RsbV2 did not significantly impact C. trachomatis growth or development. In contrast, overexpression of a non-phosphorylatable RsbV2 S55A mutant, but not overexpression of an RsbV1 S56A mutant, resulted in a 3 log reduction in infectious EB production without reduction in genomic DNA (total bacteria) or inclusion size, suggesting a block in secondary differentiation. The block was corroborated by reduced production of σ[54/28]-regulated late proteins and via transmission electron microscopy.IMPORTANCEChlamydia trachomatis is the leading cause of reportable bacterial sexually transmitted infections (STIs) and causes the eye infection trachoma, a neglected tropical disease. Broad-spectrum antibiotics used for treatment can lead to microbiome dysbiosis and increased antibiotic resistance development in other bacteria, and treatment failure for chlamydial STIs is a recognized clinical problem. Here, we show that disruption of a partner switching mechanism (PSM) significantly reduces infectious progeny production via blockage of reticulate body to elementary body differentiation. We also reveal a novel PSM expansion largely restricted to the species infecting animals, suggesting a role in pathogen evolution. Collectively, our results highlight the chlamydial PSM as a key regulator of development that could be a potential target for novel therapeutics.}, } @article {pmid39470274, year = {2024}, author = {Sivori, F and Cavallo, I and Truglio, M and Pelagalli, L and Mariani, V and Fabrizio, G and Abril, E and Santino, I and Fradiani, PA and Solmone, M and Pimpinelli, F and Toma, L and Arcioni, R and De Blasi, RA and Di Domenico, EG}, title = {Biofilm-mediated antibiotic tolerance in Staphylococcus aureus from spinal cord stimulation device-related infections.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0168324}, doi = {10.1128/spectrum.01683-24}, pmid = {39470274}, issn = {2165-0497}, abstract = {Staphylococcus aureus is a predominant cause of infections in individuals with spinal cord stimulation (SCS) devices. Biofilm formation complicates these infections, commonly requiring both surgical and antibiotic treatments. This study explored the biofilm matrix composition and antimicrobial susceptibility of planktonic and biofilm-growing S. aureus isolates from individuals with SCS-related infections. Whole-genome sequencing (WGS) examined genotypes, virulome, resistome, and the pan-genome structure. The study also analyzed biofilm matrix composition, early surface adhesion, hemolytic activity, and antibiotic-susceptibility testing. WGS revealed genetic diversity among isolates. One isolate, though oxacillin susceptible, contained the mecA gene. The median number of virulence factor genes per isolate was 58. All isolates harbored the biofilm-related icaA/D genes. When assessing phenotypic characteristics, all strains demonstrated the ability to form biofilms in vitro. The antimicrobial susceptibility profile indicated that oxacillin, rifampin, and teicoplanin showed the highest efficacy against S. aureus biofilm. Conversely, high biofilm tolerance was observed for vancomycin, trimethoprim/sulfamethoxazole, and levofloxacin. These findings suggest that S. aureus isolates are highly virulent and produce robust biofilms. In cases of suspected biofilm infections caused by S. aureus, vancomycin should not be the primary choice due to its low activity against biofilm. Instead, oxacillin, rifampin, and teicoplanin appear to be more effective options to manage SCS infections.IMPORTANCESCS devices are increasingly used to manage chronic pain, but infections associated with these devices, particularly those caused by Staphylococcus aureus, present significant clinical challenges. These infections are often complicated by biofilm formation, which protects bacteria from immune responses and antibiotic treatments, making them difficult to eradicate. Understanding the genetic diversity, virulence, and biofilm characteristics of S. aureus isolates from SCS infections is critical to improving treatment strategies. Our study highlights the need to reconsider commonly used antibiotics like vancomycin, which shows reduced activity against biofilm-growing cells. Identifying more effective alternatives, such as oxacillin, rifampin, and teicoplanin, provides valuable insight for clinicians when managing biofilm-related S. aureus infections in patients with SCS implants. This research contributes to the growing evidence that biofilm formation is crucial in treating device-related infections, emphasizing the importance of tailoring antimicrobial strategies to the biofilm phenotype.}, } @article {pmid39468479, year = {2024}, author = {van Workum, DM and Mehrem, SL and Snoek, BL and Alderkamp, MC and Lapin, D and Mulder, FFM and Van den Ackerveken, G and de Ridder, D and Schranz, ME and Smit, S}, title = {Lactuca super-pangenome reduces bias towards reference genes in lettuce research.}, journal = {BMC plant biology}, volume = {24}, number = {1}, pages = {1019}, pmid = {39468479}, issn = {1471-2229}, support = {P19-17//Nederlandse Organisatie voor Wetenschappelijk Onderzoek/ ; }, mesh = {*Lactuca/genetics ; *Genome, Plant ; *Genome-Wide Association Study ; *Quantitative Trait Loci ; Polymorphism, Single Nucleotide ; Disease Resistance/genetics ; DNA Copy Number Variations ; Genes, Plant ; Plant Breeding/methods ; Genetic Variation ; }, abstract = {BACKGROUND: Breeding of lettuce (Lactuca sativa L.), the most important leafy vegetable worldwide, for enhanced disease resistance and resilience relies on multiple wild relatives to provide the necessary genetic diversity. In this study, we constructed a super-pangenome based on four Lactuca species (representing the primary, secondary and tertiary gene pools) and comprising 474 accessions. We include 68 newly sequenced accessions to improve cultivar coverage and add important foundational breeding lines.

RESULTS: With the super-pangenome we find substantial presence/absence variation (PAV) and copy-number variation (CNV). Functional enrichment analyses of core and variable genes show that transcriptional regulators are conserved whereas disease resistance genes are variable. PAV-genome-wide association studies (GWAS) and CNV-GWAS are largely congruent with single-nucleotide polymorphism (SNP)-GWAS. Importantly, they also identify several major novel quantitative trait loci (QTL) for resistance against Bremia lactucae in variable regions not present in the reference lettuce genome. The usability of the super-pangenome is demonstrated by identifying the likely origin of non-reference resistance loci from the wild relatives Lactuca serriola, Lactuca saligna and Lactuca virosa.

CONCLUSIONS: The super-pangenome offers a broader view on the gene repertoire of lettuce, revealing relevant loci that are not in the reference genome(s). The provided methodology and data provide a strong basis for research into PAVs, CNVs and other variation underlying important biological traits of lettuce and other crops.}, } @article {pmid39464855, year = {2024}, author = {Shovon, MHJ and Imtiaz, M and Biswas, P and Tareq, MMI and Zilani, MNH and Hasan, MN}, title = {A pan-genomic analysis based multi-epitope vaccine development by targeting Stenotrophomonas maltophilia using reverse vaccinology method: an in-silico approach.}, journal = {In silico pharmacology}, volume = {12}, number = {2}, pages = {93}, pmid = {39464855}, issn = {2193-9616}, abstract = {Antibiotic resistance in bacteria leads to high mortality rates and healthcare costs, a significant concern for public health. A colonizer of the human respiratory system, Stenotrophomonas maltophilia is frequently associated with hospital-acquired infections in individuals with cystic fibrosis, cancer, and other chronic illnesses. The importance of this study is underscored by its capacity to meet the critical demand for effective preventive strategies against this pathogen, particularly among susceptible groups of cystic fibrosis and those undergoing cancer treatment. In this study, we engineered a multi-epitope vaccine targeting S. maltophilia through genomic analysis, reverse vaccination strategies, and immunoinformatic techniques by examining a total of 81 complete genomes of S. maltophilia strains. Our investigation revealed 1945 core protein-coding genes alongside their corresponding proteomic sequences, with 191 of these genes predicted to exhibit virulence characteristics. Out of the filtered proteins, three best antigenic proteins were selected for epitope prediction while seven epitopes each from CTL, HTL, and B cell were chosen for vaccine development. The vaccine was refined and validated, showing highly antigenic and desirable physicochemical features. Molecular docking assessments revealed stable binding with TLR-4. Molecular dynamic simulation demonstrated stable dynamics with minor alterations. The originality of this investigation is rooted in the thorough techniques aimed at designing a vaccine that directly targets S. maltophilia, a microorganism of considerable clinical relevance that currently lacks an available vaccine. This study not only responds to a pressing public health crisis but also lays the groundwork for subsequent research endeavors focused on the prevention of S. maltophilia outbreaks. Further evidence from studies in mice models is needed to confirm immune protection against S. maltophilia.}, } @article {pmid39464141, year = {2024}, author = {Novak, AM and Chung, D and Hickey, G and Djebali, S and Yokoyama, TT and Garrison, E and Narzisi, G and Paten, B and Monlong, J}, title = {Efficient indexing and querying of annotations in a pangenome graph.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.10.12.618009}, pmid = {39464141}, issn = {2692-8205}, abstract = {The current reference genome is the backbone of diverse and rich annotations. Simple text formats, like VCF or BED, have been widely adopted and helped the critical exchange of genomic information. There is a dire need for tools and formats enabling pangenomic annotation to facilitate such enrichment of pangenomic references. The Graph Alignment Format (GAF) is a text format, tab-delimited like BED/VCF files, which was proposed to represent alignments. GAF could also be used to store paths representing annotations in a pangenome graph, but there are no tools to index and query them efficiently. Here, we present extensions to vg and HTSlib that provide efficient sorting, indexing, and querying for GAF files. With this approach, annotations overlapping a subgraph can be extracted quickly. Paths are sorted based on the IDs of traversed nodes, compressed with BGZIP, and indexed with HTSlib/tabix via our extensions for the GAF format. Compared to the binary GAM format, GAF files are easier to edit or inspect because they are plain text, and we show that they are twice as fast to sort and half as large on disk. In addition, we updated vg annotate , which takes BED or GFF3 annotation files relative to linear sequences and projects them into the pangenome. It can now produce GAF files representing these annotations' paths through the pangenome. We showcase these new tools on several applications. We projected annotations for all Human Pangenome Reference Consortium Year 1 haplotypes, including genes, segmental duplications, tandem repeats and repeats annotations, into the Minigraph-Cactus pangenome (GRCh38-based v1.1). We also projected known variants from the GWAS Catalog and expression QTLs from the GTEx project into the pangenome. Finally, we reanalyzed ATAC-seq data from ENCODE to demonstrate what a coverage track could look like in a pangenome graph. These rich annotations can be quickly queried with vg and visualized using existing tools like the Sequence Tube Map or Bandage.}, } @article {pmid39460918, year = {2025}, author = {Dahodwala, H and Sharfstein, ST}, title = {The 'Omics Revolution in CHO Biology: Roadmap to Improved CHO Productivity.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2853}, number = {}, pages = {119-137}, pmid = {39460918}, issn = {1940-6029}, mesh = {CHO Cells ; Animals ; *Cricetulus ; *Proteomics/methods ; *Recombinant Proteins/genetics/metabolism ; Genomics/methods ; Transcriptome ; Cricetinae ; }, abstract = {Chinese hamster ovary (CHO) cell physiology understanding has advanced very rapidly in the past few years with incredible improvements in long-read sequencing, improved resolution, and increased computational power. Multiple parental lines have been sequenced and the resultant pan-genome can be leveraged to increase our understanding of the diverse pathways CHO cells can take to get high-productivity phenotypes. The same improvements in workflows have complemented transcriptomic studies. Microfluidics and label-free innovations have further increased the sensitivity and accuracy of proteomic methods, while also making proteomics more accessible. In this 'omics era, high-throughput screening methods, sophisticated informatic tools, and models continually drive major innovations in cell line development and process engineering. This review describes the various recent achievements in 'omics techniques and their application to improve recombinant protein expression from CHO cell lines.}, } @article {pmid39460811, year = {2024}, author = {Hasan, A and Ibrahim, M and Alonazi, WB and Yu, R and Li, B}, title = {Pangenome analysis of five representative Tropheryma whipplei strains following multiepitope-based vaccine design via immunoinformatic approaches.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {101}, pmid = {39460811}, issn = {1617-4623}, mesh = {Humans ; *Tropheryma/genetics/immunology ; *Bacterial Vaccines/immunology/genetics ; Whipple Disease/immunology/microbiology/genetics ; Computational Biology/methods ; Bacterial Proteins/genetics/immunology ; Genome, Bacterial ; Epitopes/immunology/genetics ; Vaccine Development ; Immunodominant Epitopes/immunology/genetics ; Proteomics/methods ; Proteome/genetics/immunology ; }, abstract = {Whipple disease caused by Tropheryma whipplei a gram-positive bacterium is a systemic disorder that impacts not only the gastrointestinal tract but also the vascular system, joints, central nervous system, and cardiovascular system. Due to the lack of an approved vaccine, this study aimed to utilize immunoinformatic approaches to design multiepitope -based vaccine by utilizing the proteomes of five representative T. whipplei strains. The genomes initially comprised a total of 4,844 proteins ranging from 956 to 1012 proteins per strain. We collected 829 nonredundant lists of core proteins, that were shared among all the strains. Following subtractive proteomics, one extracellular protein, WP_033800108.1, a WhiB family transcriptional regulator, was selected for the chimeric-based multiepitope vaccine. Five immunodominant epitopes were retrieved from the WhiB family transcriptional regulator protein, indicating MHC-I and MHC-II with a global population coverage of 70.61%. The strong binding affinity, high solubility, nontoxicity, nonallergenic properties and high antigenicity scores make the selected epitopes more appropriate. Integration of the epitopes into a chimeric vaccine was carried out by applying appropriate adjuvant molecules and linkers, leading to the vaccine construct having enhanced immunogenicity and successfully eliciting both innate and adaptive immune responses. Moreover, the abilityof the vaccine to bind TLR4, a core innate immune receptor, was confirmed. Molecular dynamics simulations have also revealed the promising potential stability of the designed vaccine at 400 ns. In summary, we have designed a potential vaccine construct that has the ability not only to induce targeted immunogenicity for one strain but also for global T. whipplei strains. This study proposes a potential universal vaccine, reducing Whipple's disease risk and laying the groundwork for future research on multi-strain pathogens.}, } @article {pmid39460626, year = {2024}, author = {Jespersen, MG and Hayes, AJ and Tong, SYC and Davies, MR}, title = {Insertion sequence elements and unique symmetrical genomic regions mediate chromosomal inversions in Streptococcus pyogenes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae948}, pmid = {39460626}, issn = {1362-4962}, support = {//The University of Melbourne/ ; }, abstract = {Chromosomal inversions are a phenomenon in many bacterial species, often across the axis of replication. Inversions have been shown to alter gene expression, changing persistence of colonisation and infection following environmental stresses. In Streptococcus pyogenes, inversions have been reported. However, frequency and molecular markers of inversions have not been systematically examined. Here, 249 complete S.pyogenes genomes were analysed using a pangenomic core gene synteny framework to identify sequences associated with inversions. 47% of genomes (118/249) contained at least one inversion, from 23 unique inversion locations. Chromosomal locations enabling inversions were usually associated with mobile elements (insertion sequences n = 9 and prophages n = 7). Two insertion sequences, IS1548 and IS1239, accounted for >80% of insertion sequences and were the only insertion sequences associated with inversions. The most observed inversion location (n = 104 genomes, 88% of genomes with an inversion) occurs between two conserved regions encoding rRNAs, tRNAs and sigma factor genes. The regions are symmetrically placed around the origin of replication forming a unique chromosomal structure in S. pyogenes, relative to other streptococci. Cataloging of the chromosomal location and frequency of inversions can direct dissection of phenotypic changes following chromosomal inversions. The framework used here can be transferred to other bacterial species to characterise chromosomal inversions.}, } @article {pmid39460615, year = {2024}, author = {Wen, J and Zhang, H and Chu, D and Chen, X and Feng, J and Wang, Y and Liu, G and Zhang, Y and Li, Y and Ning, K}, title = {Deep learning revealed the distribution and evolution patterns for invertible promoters across bacterial lineages.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae966}, pmid = {39460615}, issn = {1362-4962}, support = {32071465//National Natural Science Foundation of China/ ; 2023YFA1800900//National Key Research and Development Program of China/ ; }, abstract = {Invertible promoters (invertons) are crucial regulatory elements in bacteria, facilitating gene expression changes under stress. Despite their importance, their prevalence and the range of regulated gene functions are largely unknown. We introduced DeepInverton, a deep learning model that identifies invertons across a broad phylogenetic spectrum without using sequencing reads. By analyzing 68 733 bacterial genomes and 9382 metagenomes, we have uncovered over 200 000 nonredundant invertons and have also highlighted their abundance in pathogens. Additionally, we identified a post-Cambrian Explosion increase of invertons, paralleling species diversification. Furthermore, we revealed that invertons regulate diverse functions, including antimicrobial resistance and biofilm formation, underscoring their role in environmental adaptation. Notably, the majority of inverton identifications by DeepInverton have been confirmed by the in vitro experiments. The comprehensive inverton profiles have deepened our understanding of invertons at pan-genome and pan-metagenome scales, enabling a broad spectrum of applications in microbial ecology and synthetic biology.}, } @article {pmid39459622, year = {2024}, author = {Lu, Y and Huang, J and Liu, D and Kong, X and Song, Y and Jing, L}, title = {Pangenome Data Analysis Reveals Characteristics of Resistance Gene Analogs Associated with Sclerotinia sclerotiorum Resistance in Sunflower.}, journal = {Life (Basel, Switzerland)}, volume = {14}, number = {10}, pages = {}, doi = {10.3390/life14101322}, pmid = {39459622}, issn = {2075-1729}, support = {32060598//National Natural Science Foundation of China/ ; 32160642//National Natural Science Foundation of China/ ; NMGIRT2320//the Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; }, abstract = {The sunflower, an important oilseed crop and food source across the world, is susceptible to several pathogens, which cause severe losses in sunflower production. The utilization of genetic resistance is the most economical, effective measure to prevent infectious diseases. Based on the sunflower pangenome, in this study, we explored the variability of resistance gene analogs (RGAs) within the species. According to a comparative analysis of RGA candidates in the sunflower pangenome using the RGAugury pipeline, a total of 1344 RGAs were identified, comprising 1107 conserved, 199 varied, and 38 rare RGAs. We also identified RGAs associated with resistance against Sclerotinia sclerotiorum (S. sclerotiorum) in sunflower at the quantitative trait locus (QTL). A total of 61 RGAs were found to be located at four quantitative trait loci (QTLs). Through a detailed expression analysis of RGAs in one susceptible and two tolerant sunflower inbred lines (ILs) across various time points post inoculation, we discovered that 348 RGAs exhibited differential expression in response to Sclerotinia head rot (SHR), with 17 of these differentially expressed RGAs being situated within the QTL regions. In addition, 15 RGA candidates had gene introgression. Our data provide a better understanding of RGAs, which facilitate genomics-based improvements in disease resistance in sunflower.}, } @article {pmid39456781, year = {2024}, author = {Ríos-Fernández, P and Caicedo-Montoya, C and Ríos-Estepa, R}, title = {Genomic Diversity of Streptomyces clavuligerus: Implications for Clavulanic Acid Biosynthesis and Industrial Hyperproduction.}, journal = {International journal of molecular sciences}, volume = {25}, number = {20}, pages = {}, doi = {10.3390/ijms252010992}, pmid = {39456781}, issn = {1422-0067}, support = {No. 933-2023//MINISTERIO DE CIENCIA Y TECNOLOGIA E INNOVACION - MINCIENCIAS COLOMBIA/ ; }, mesh = {*Streptomyces/genetics/metabolism ; *Clavulanic Acid/biosynthesis ; *Genome, Bacterial ; *Phylogeny ; Genetic Variation ; Genomics/methods ; Plasmids/genetics ; }, abstract = {Streptomyces clavuligerus is a species used worldwide to industrially produce clavulanic acid (CA), a molecule that enhances antibiotic effectiveness against β-lactamase-producing bacterial strains. Despite its low inherent CA production, hyper-producing strains have been developed. However, genomic analyses specific to S. clavuligerus and CA biosynthesis are limited. Genomic variations that may influence CA yield were explored using S. clavuligerus strain genomes from diverse sources. Despite the slight differences obtained by similarity index calculation, pan-genome estimation revealed that only half of the genes identified were present in all strains. As expected, core genes were associated with primary metabolism, while the remaining genes were linked to secondary metabolism. Differences at the sequence level were more likely to be found in regions close to the tips of the linear chromosome. Wild-type strains preserved larger chromosomal and plasmid regions compared to industrial and/or hyper-producing strains; such a grouping pattern was also found through refined phylogenetic analyses. These results provide essential insights for the development of hyper-producing S. clavuligerus strains, attending to the critical demand for this antibiotic enhancer and contributing to future strategies for CA production optimization.}, } @article {pmid39455800, year = {2024}, author = {Behera, S and Catreux, S and Rossi, M and Truong, S and Huang, Z and Ruehle, M and Visvanath, A and Parnaby, G and Roddey, C and Onuchic, V and Finocchio, A and Cameron, DL and English, A and Mehtalia, S and Han, J and Mehio, R and Sedlazeck, FJ}, title = {Comprehensive genome analysis and variant detection at scale using DRAGEN.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {39455800}, issn = {1546-1696}, abstract = {Research and medical genomics require comprehensive, scalable methods for the discovery of novel disease targets, evolutionary drivers and genetic markers with clinical significance. This necessitates a framework to identify all types of variants independent of their size or location. Here we present DRAGEN, which uses multigenome mapping with pangenome references, hardware acceleration and machine learning-based variant detection to provide insights into individual genomes, with ~30 min of computation time from raw reads to variant detection. DRAGEN outperforms current state-of-the-art methods in speed and accuracy across all variant types (single-nucleotide variations, insertions or deletions, short tandem repeats, structural variations and copy number variations) and incorporates specialized methods for analysis of medically relevant genes. We demonstrate the performance of DRAGEN across 3,202 whole-genome sequencing datasets by generating fully genotyped multisample variant call format files and demonstrate its scalability, accuracy and innovation to further advance the integration of comprehensive genomics. Overall, DRAGEN marks a major milestone in sequencing data analysis and will provide insights across various diseases, including Mendelian and rare diseases, with a highly comprehensive and scalable platform.}, } @article {pmid39453979, year = {2024}, author = {Skiadas, P and Vidal, SR and Dommisse, J and Mendel, MN and Elberse, J and Van den Ackerveken, G and de Jonge, R and Seidl, MF}, title = {Pangenome graph analysis reveals extensive effector copy-number variation in spinach downy mildew.}, journal = {PLoS genetics}, volume = {20}, number = {10}, pages = {e1011452}, doi = {10.1371/journal.pgen.1011452}, pmid = {39453979}, issn = {1553-7404}, abstract = {Plant pathogens adapt at speeds that challenge contemporary disease management strategies like the deployment of disease resistance genes. The strong evolutionary pressure to adapt, shapes pathogens' genomes, and comparative genomics has been instrumental in characterizing this process. With the aim to capture genomic variation at high resolution and study the processes contributing to adaptation, we here leverage an innovative, multi-genome method to construct and annotate the first pangenome graph of an oomycete plant pathogen. We expand on this approach by analysing the graph and creating synteny based single-copy orthogroups for all genes. We generated telomere-to-telomere genome assemblies of six genetically diverse isolates of the oomycete pathogen Peronospora effusa, the economically most important disease in cultivated spinach worldwide. The pangenome graph demonstrates that P. effusa genomes are highly conserved, both in chromosomal structure and gene content, and revealed the continued activity of transposable elements which are directly responsible for 80% of the observed variation between the isolates. While most genes are generally conserved, virulence related genes are highly variable between the isolates. Most of the variation is found in large gene clusters resulting from extensive copy-number expansion. Pangenome graph-based discovery can thus be effectively used to capture genomic variation at exceptional resolution, thereby providing a framework to study the biology and evolution of plant pathogens.}, } @article {pmid39452755, year = {2024}, author = {Sabin, SJ and Beesley, CA and Marston, CK and Paisie, TK and Gulvik, CA and Sprenger, GA and Gee, JE and Traxler, RM and Bell, ME and McQuiston, JR and Weiner, ZP}, title = {Investigating Anthrax-Associated Virulence Genes among Archival and Contemporary Bacillus cereus Group Genomes.}, journal = {Pathogens (Basel, Switzerland)}, volume = {13}, number = {10}, pages = {}, doi = {10.3390/pathogens13100884}, pmid = {39452755}, issn = {2076-0817}, abstract = {Bacillus anthracis causes anthrax through virulence factors encoded on two plasmids. However, non-B. anthracis organisms within the closely related, environmentally ubiquitous Bacillus cereus group (BCG) may cause an anthrax-like disease in humans through the partial adoption of anthrax-associated virulence genes, challenging the definition of anthrax disease. To elucidate these phenomena and their evolutionary past, we performed whole-genome sequencing on non-anthracis BCG isolates, including 93 archival (1967-2003) and 5 contemporary isolates (2019-2023). We produced annotated genomic assemblies and performed a pan-genome analysis to identify evidence of virulence gene homology and virulence gene acquisition by linear inheritance or horizontal gene transfer. At least one anthrax-associated virulence gene was annotated in ten isolates. Most homologous sequences in archival isolates showed evidence of pseudogenization and subsequent gene loss. The presence or absence of accessory genes, including anthrax-associated virulence genes, aligned with the phylogenetic structure of the BCG core genome. These findings support the hypothesis that anthrax-associated virulence genes were inherited from a common ancestor in the BCG and were retained or lost across different lineages, and contribute to a growing body of work informing public health strategies related to anthrax surveillance and identification.}, } @article {pmid39447662, year = {2024}, author = {Qin, H and Ren, J and Zeng, D and Jin, R and Deng, Q and Su, L and Luo, Z and Jiang, J and Wang, P}, title = {Using Reverse Vaccinology Techniques Combined with B-cell Epitope Prediction to Screen Potential Antigenic Proteins of the Bovine pathogen Clostridium Perfringens Type A.}, journal = {Microbial pathogenesis}, volume = {}, number = {}, pages = {107049}, doi = {10.1016/j.micpath.2024.107049}, pmid = {39447662}, issn = {1096-1208}, abstract = {Clostridium perfringens type A frequently causes necrohaemorrhagic enteritis in cattle, a rapidly progressing disease with a high mortality rate, thus inflicting substantial economic losses in the cattle industry. Effective prevention and control of this disease rely on rapid detection and vaccination strategies, making the screening of antigenic proteins with diagnostic and vaccine potential particularly crucial. In this study, we conducted a pangenomic analysis of 15 bacterial strains, grounded in traditional reverse vaccinology and supplemented with B-cell linear and conformational epitope analysis tools. This approach led to the identification of 2,304 core genes and 3,606 accessory genes, among which 58 surface-exposed proteins, encoded by core genes, were identified Proteins lacking tertiary structure information were predicted via AlphaFold2, ultimately identifying four target proteins and 14 candidate proteins enriched with linear and conformational epitopes, including virulence proteins such as alpha-toxin, theta-toxin, and alpha-clostripain, and extracellular solute-binding proteins, rhodanese-like proteins, and the accessory gene-encoded lysozyme inhibitor LprI family protein. Our findings demonstrate that the combined use of multiple B-cell epitope analysis tools can help overcome the limitations of any single tool. The proteins selected in this study offer valuable references for rapid diagnostics and the development of genetically engineered vaccines.}, } @article {pmid39446252, year = {2024}, author = {Guo, L and Li, S and Cheng, D and Lu, X and Gao, X and Zhang, L and Lu, J}, title = {Integrated proteome and pangenome analysis revealed the variation of microalga Isochrysis galbana and associated bacterial community to 2,6-Di-tert-butyl-p-cresol (BHT) stress.}, journal = {World journal of microbiology & biotechnology}, volume = {40}, number = {11}, pages = {364}, pmid = {39446252}, issn = {1573-0972}, support = {tsqn202211157//Taishan scholars Program of Shandong Province/ ; 2023HWYQ-077//Shandong Excellent Youth Science Fund Project (Overseas)/ ; }, mesh = {*Microalgae/metabolism/genetics ; *Cresols/metabolism ; *Proteome ; *Bacteria/genetics/metabolism/classification ; *Stress, Physiological ; Haptophyta/metabolism/genetics ; Proteomics ; Photosynthesis ; Antioxidants/metabolism ; Biomass ; Chlorophyll/metabolism ; }, abstract = {The phenolic antioxidant 2,6-Di-tert-butyl-p-cresol (BHT) has been detected in various environments and is considered a potential threat to aquatic organisms. Algal-bacterial interactions are crucial for maintaining ecosystem balance and elemental cycling, but their response to BHT remains to be investigated. This study analyzed the physiological and biochemical responses of the microalga Isochrysis galbana and the changes of associated bacterial communities under different concentrations of BHT stress. Results showed that the biomass of I. galbana exhibited a decreasing trend with increasing BHT concentrations up to 40 mg/L. The reduction in chlorophyll, carotenoid, and soluble protein content of microalgal cells was also observed under BHT stress. The production of malondialdehyde and the activities of superoxide dismutase, peroxidase, and catalase were further determined. Scanning electron microscopy analysis revealed that BHT caused surface rupture of the algal cells and loss of intracellular nutrients. Proteomic analysis demonstrated the upregulation of photosynthesis and citric acid cycle pathways as a response to BHT stress. Additionally, BHT significantly increased the relative abundance of specific bacteria in the phycosphere, including Marivita, Halomonas, Marinobacter, and Alteromonas. Further experiments confirmed that these bacteria had the ability to utilize BHT as the sole carbon resource for growth, and genes related to the degradation of phenolic compounds were detected through pangenome analysis.}, } @article {pmid39445804, year = {2024}, author = {Wang, C and Wang, C and Liu, Y and Yue, Y and Lu, X and Wang, H and Ying, Y and Chen, J}, title = {Targeted discovery of polyketides with antioxidant activity through integrated omics and cocultivation strategies.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0160324}, doi = {10.1128/aem.01603-24}, pmid = {39445804}, issn = {1098-5336}, abstract = {Fungi generate a diverse array of bioactive compounds with significant pharmaceutical applications. However, the chemical diversity of natural products in fungi remains largely unexplored. Here, we present a paradigm for specifically discovering diverse and bioactive compounds from fungi by integrating genome mining with building block molecular network and coculture analysis. Through pangenome and sequence similarity network analysis, we identified a rare type I polyketide enzyme from Penicillium sp. ZJUT-34. Subsequent building block molecular network and coculture strategy led to the identification and isolation of a pair of novel polyketides, (±)-peniphenone E [(±)-1], three known polyketides (2-4), and three precursor compounds (5-7) from a combined culture of Penicillium sp. ZJUT-34 and Penicillium sp. ZJUT23. Their structures were established through extensive spectroscopic analysis, including NMR and HRESIMS. Chiral HPLC separation of compound 1 yielded a pair of enantiomers (+)-1 and (-)-1, with their absolute configurations determined using calculated ECD methods. Compound (±)-1 is notable for its unprecedented structure, featuring a unique 2-methyl-hexenyl-3-one moiety fused with a polyketide clavatol core. We proposed a hypothetical biosynthetic pathway for (±)-1. Furthermore, compounds 2, 5, and 6 exhibited strong antioxidant activity, whereas (-)-1, (+)-1, 3, and four exhibited moderate antioxidant activity compared to the positive control, ascorbic acid. Our research demonstrates a pioneering strategy for uncovering novel polyketides by merging genome mining, metabolomics, and cocultivation methods. This approach addresses the challenge of discovering natural compounds produced by rare biosynthetic enzymes that are often silent under conventional conditions due to gene regulation.IMPORTANCEPolyketides, particularly those with complex structures, are crucial in drug development and synthesis. This study introduces a novel approach to discover new polyketides by integrating genomics, metabolomics, and cocultivation strategies. By combining genome mining, building block molecular networks, and coculturing techniques, we identified and isolated a unique polyketide, (±)-peniphenone E, along with three known polyketides and three precursor compounds from Penicillium sp. ZJUT-34 and Penicillium sp. ZJUT23. This approach highlights the potential of using combined strategies to explore fungal chemical diversity and discover novel bioactive compounds. The successful identification of (±)-peniphenone E, with its distinctive structure, demonstrates the effectiveness of this integrated method in enhancing natural product discovery and underscores the value of innovative approaches in natural product research.}, } @article {pmid39441363, year = {2024}, author = {Li, M and Hu, X and Ni, T and Ni, Y and Xue, D and Li, F}, title = {Comparative genomic analyses of the genus Robertmurraya and proposal of the novel species Robertmurraya mangrovi sp. nov., isolated from mangrove soil.}, journal = {Antonie van Leeuwenhoek}, volume = {118}, number = {1}, pages = {22}, pmid = {39441363}, issn = {1572-9699}, support = {2022rczd013//Project of High-Level Talents in AHUCM/ ; 2024rcyb008//Project of High-Level Talents in AHUCM/ ; 2023AH050792//Natural Science Research Project of Anhui Educational Committee/ ; 2024AH051019//Natural Science Research Project of Anhui Educational Committee/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *RNA, Ribosomal, 16S/genetics ; *Base Composition ; *Genome, Bacterial ; *DNA, Bacterial/genetics ; Wetlands ; Bacterial Typing Techniques ; China ; Genomics ; Fatty Acids/analysis ; Nucleic Acid Hybridization ; Sequence Analysis, DNA ; }, abstract = {A Gram-positive, aerobic, motile, rod-shaped bacterial strain, designated 31A1R[T], was isolated from the mangrove soil of Xilian village, Zhanjiang, China. Strain 31A1R[T] thrives at temperatures ranging from 15 to 45 °C (optimum at 30 °C), pH 6.5-10 (optimum at 8.5), and in the presence of 0-5% (w/v) NaCl (optimum at 1.5%). The strain shares the highest 16S rRNA gene sequence similarity with Robertmurraya crescens (97.24%) and Robertmurraya dakarensis (97.18%). The complete genome of strain 31A1R[T] spans 4.71 Mbp with a genomic DNA G + C content of 35.9 mol%. The average nucleotide identity and DNA-DNA hybridization values between strain 31A1R[T] and type strains of other species of the genus Robertmurraya were 71.24-72.11% and 19.90-21.40%, respectively. The amino acid identity values and percentage of conserved proteins ranged from 66.94 to 68.10% and from 58.34 to 61.62%, respectively, aligning with intrageneric cutoff values. The major fatty acids (≥ 5.0%) were iso-C14:0 (5.0%), iso-C15:0 (41.4%), iso-C16:0 (12.6%), C16:1ω7c alcohol (12.2%), and iso-C17:1 ω10c (6.5%). The polar lipids profile was mainly composed of diphosphatidyl glycerol, phosphatidyl glycerol, and phosphatidyl ethanolamine. We also profiled the pan-genome and metabolic features of genomic assemblies of strains belonging to the genus Robertmurraya, which indicated functional capacities and metabolic similarities. Consequently, we propose that strain 31A1R[T] represents a new species in the genus Robertmurraya, for which the name Robertmurraya mangrovi sp. nov. is proposed, with the type strain being 31A1R[T] (= GDMCC 1.4378[T] = JCM 36937[T]).}, } @article {pmid39438795, year = {2024}, author = {Abdelsalam, NA and ElBanna, SA and Mouftah, SF and Cobo-Díaz, JF and Shata, AH and Shawky, SM and Atteya, R and Elhadidy, M}, title = {Genomic dynamics of high-risk carbapenem-resistant klebsiella pneumoniae clones carrying hypervirulence determinants in Egyptian clinical settings.}, journal = {BMC infectious diseases}, volume = {24}, number = {1}, pages = {1193}, pmid = {39438795}, issn = {1471-2334}, abstract = {BACKGROUND: Ongoing studies have revealed the global prevalence of severe infections caused by the hypervirulent strains of Klebsiella pneumoniae (K. pneumoniae). Meanwhile, the World Health Organization and the Centers for Disease Control declared carbapenem-resistant K. pneumoniae as an urgent public health threat, requiring swift and effective action to mitigate its spread. Low- and middle-income countries are severely impacted by such devastating infectious diseases owing to the ill implementation of antimicrobial practices and infection control policies. Having both hypervirulence and carbapenemase gene determinants, the emergence of convergent hypervirulent carbapenem-resistant K. pneumoniae is now being reported worldwide.

METHODS: In this study, we sequenced 19 carbapenemase-producing K. pneumoniae strains recovered from various clinical specimens. Additionally, we evaluated the phenotypic antimicrobial susceptibility to multiple antimicrobial classes using the VITEK2 automated system. Utilizing the sequencing data, we characterized the sequence types, serotypes, pangenome, resistance profiles, virulence profiles, and mobile genetic elements of the examined isolates. We highlighted the emergence of high-risk clones carrying hypervirulence genetic determinants among the screened isolates.

RESULTS: Our findings revealed that all carbapenem-resistant isolates exhibited either extensive- or pan-drug resistance and harbored multiple variants of resistance genes spanning nearly all the antimicrobial classes. The most prevalent carbapenemase genes detected within the isolates were blaNDM-5 and blaOXA-48. We identified high-risk clones, such as ST383-K30, ST147-K64, ST11-K15, and ST14-K2, which may have evolved into putative convergent strains by acquiring the full set of hypervirulence-associated genetic determinants (iucABCD, rmpA and/ or rmpA2, putative transporter peg-344). Additionally, this study identified ST709-K9 as a high-risk clone for the first time and uncovered that capsule types K15 and K9 carried hypervirulence genetic determinants. The most frequent Inc types found in these isolates were Col440I, IncHI1B, and Inc FII(K).

CONCLUSION: This study highlights the emergence of high-risk, extensively carbapenem-resistant K. pneumoniae strains co-carrying hypervirulence determinants in Egyptian clinical settings. This poses an imminent threat not only to Egypt but also to the global community, underscoring the urgent need for enhanced surveillance and control strategies to combat this pathogen.}, } @article {pmid39436044, year = {2024}, author = {Janssen, AB and Gibson, PS and Bravo, AM and de Bakker, V and Slager, J and Veening, JW}, title = {PneumoBrowse 2: an integrated visual platform for curated genome annotation and multiomics data analysis of Streptococcus pneumoniae.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae923}, pmid = {39436044}, issn = {1362-4962}, support = {TMPFP3_210 202/SNSF_/Swiss National Science Foundation/Switzerland ; //Faculty of Biology and Medicine of the University of Lausanne/ ; }, abstract = {Streptococcus pneumoniae is an opportunistic human pathogen responsible for high morbidity and mortality rates. Extensive genome sequencing revealed its large pangenome, serotype diversity, and provided insight into genome dynamics. However, functional genome analysis has lagged behind, as that requires detailed and time-consuming manual curation of genome annotations and integration of genomic and phenotypic data. To remedy this, PneumoBrowse was presented in 2018, a user-friendly interactive online platform, which provided the detailed annotation of the S. pneumoniae D39V genome, alongside transcriptomic data. Since 2018, many new studies on S. pneumoniae genome biology and protein functioning have been performed. Here, we present PneumoBrowse 2 (https://veeninglab.com/pneumobrowse), fully rebuilt in JBrowse 2. We updated annotations for transcribed and transcriptional regulatory features in the D39V genome. We added genome-wide data tracks for high-resolution chromosome conformation capture (Hi-C) data, chromatin immunoprecipitation coupled to high-throughput sequencing (ChIP-Seq), ribosome profiling, CRISPRi-seq gene essentiality data and more. Additionally, we included 18 phylogenetically diverse S. pneumoniae genomes and their annotations. By providing easy access to diverse high-quality genome annotations and links to other databases (including UniProt and AlphaFold), PneumoBrowse 2 will further accelerate research and development into preventive and treatment strategies, through increased understanding of the pneumococcal genome.}, } @article {pmid39435573, year = {2024}, author = {MacPhillamy, C and Chen, T and Hiendleder, S and Williams, JL and Alinejad-Rokny, H and Low, WY}, title = {DNA methylation analysis to differentiate reference, breed, and parent-of-origin effects in the bovine pangenome era.}, journal = {GigaScience}, volume = {13}, number = {}, pages = {}, doi = {10.1093/gigascience/giae061}, pmid = {39435573}, issn = {2047-217X}, support = {//Davies Livestock Research Centre/ ; }, mesh = {*DNA Methylation ; Animals ; Cattle/genetics ; *Polymorphism, Single Nucleotide ; *CpG Islands ; Genome ; Whole Genome Sequencing/methods ; Breeding ; Epigenome ; }, abstract = {BACKGROUND: Most DNA methylation studies have used a single reference genome with little attention paid to the bias introduced due to the reference chosen. Reference genome artifacts and genetic variation, including single nucleotide polymorphisms (SNPs) and structural variants (SVs), can lead to differences in methylation sites (CpGs) between individuals of the same species. We analyzed whole-genome bisulfite sequencing data from the fetal liver of Angus (Bos taurus taurus), Brahman (Bos taurus indicus), and reciprocally crossed samples. Using reference genomes for each breed from the Bovine Pangenome Consortium, we investigated the influence of reference genome choice on the breed and parent-of-origin effects in methylome analyses.

RESULTS: Our findings revealed that ∼75% of CpG sites were shared between Angus and Brahman, ∼5% were breed specific, and ∼20% were unresolved. We demonstrated up to ∼2% quantification bias in global methylation when an incorrect reference genome was used. Furthermore, we found that SNPs impacted CpGs 13 times more than other autosomal sites (P < $5 \times {10}^{ - 324}$) and SVs contained 1.18 times (P < $5 \times {10}^{ - 324}$) more CpGs than non-SVs. We found a poor overlap between differentially methylated regions (DMRs) and differentially expressed genes (DEGs) and suggest that DMRs may be impacting enhancers that target these DEGs. DMRs overlapped with imprinted genes, of which 1, DGAT1, which is important for fat metabolism and weight gain, was found in the breed-specific and sire-of-origin comparisons.

CONCLUSIONS: This work demonstrates the need to consider reference genome effects to explore genetic and epigenetic differences accurately and identify DMRs involved in controlling certain genes.}, } @article {pmid39433878, year = {2024}, author = {Garrison, E and Guarracino, A and Heumos, S and Villani, F and Bao, Z and Tattini, L and Hagmann, J and Vorbrugg, S and Marco-Sola, S and Kubica, C and Ashbrook, DG and Thorell, K and Rusholme-Pilcher, RL and Liti, G and Rudbeck, E and Golicz, AA and Nahnsen, S and Yang, Z and Mwaniki, MN and Nobrega, FL and Wu, Y and Chen, H and de Ligt, J and Sudmant, PH and Huang, S and Weigel, D and Soranzo, N and Colonna, V and Williams, RW and Prins, P}, title = {Building pangenome graphs.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {39433878}, issn = {1548-7105}, support = {U01DA047638//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; R01GM123489//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; 2118709//National Science Foundation (NSF)/ ; Center for Integrative and Translational Genomics//University of Tennessee (UT)/ ; }, abstract = {Pangenome graphs can represent all variation between multiple reference genomes, but current approaches to build them exclude complex sequences or are based upon a single reference. In response, we developed the PanGenome Graph Builder, a pipeline for constructing pangenome graphs without bias or exclusion. The PanGenome Graph Builder uses all-to-all alignments to build a variation graph in which we can identify variation, measure conservation, detect recombination events and infer phylogenetic relationships.}, } @article {pmid39433877, year = {2024}, author = {Chikhi, R and Dufresne, Y and Medvedev, P}, title = {Constructing and personalizing population pangenome graphs.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {39433877}, issn = {1548-7105}, support = {ANR-22-CE45-0007//Agence Nationale de la Recherche (French National Research Agency)/ ; ANR-19-CE45-0008//Agence Nationale de la Recherche (French National Research Agency)/ ; ANR-19-P3IA-0001//Agence Nationale de la Recherche (French National Research Agency)/ ; ANR-21-CE46-0012-03//Agence Nationale de la Recherche (French National Research Agency)/ ; 872539//European Commission (EC)/ ; 956229//European Commission (EC)/ ; 101047160//European Commission (EC)/ ; 101088572//European Commission (EC)/ ; DBI2138585//National Science Foundation (NSF)/ ; R01GM146462//U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)/ ; }, } @article {pmid39432666, year = {2024}, author = {Ford, MKB and Hari, A and Zhou, Q and Numanagić, I and Cenk Sahinalp, S}, title = {Biologically-informed Killer cell immunoglobulin-like receptor (KIR) gene annotation tool.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae622}, pmid = {39432666}, issn = {1367-4811}, abstract = {SUMMARY: Natural killer (NK) cells are essential components of the innate immune system, with their activity significantly regulated by Killer cell Immunoglobulin-like Receptors (KIRs). The diversity and structural complexity of KIR genes present significant challenges for accurate genotyping, essential for understanding NK cell functions and their implications in health and disease. Traditional genotyping methods struggle with the variable nature of KIR genes, leading to inaccuracies that can impede immunogenetic research. These challenges extend to high-quality phased assemblies, which have been recently popularized by the Human Pangenome Consortium. This paper introduces BAKIR (Biologically-informed Annotator for KIR locus), a tailored computational tool designed to overcome the challenges of KIR genotyping and annotation on high-quality, phased genome assemblies. BAKIR aims to enhance the accuracy of KIR gene annotations by structuring its annotation pipeline around identifying key functional mutations, thereby improving the identification and subsequent relevance of gene and allele calls. It uses a multi-stage mapping, alignment, and variant calling process to ensure high-precision gene and allele identification, while also maintaining high recall for sequences that are significantly mutated or truncated relative to the known allele database. BAKIR has been evaluated on a subset of the HPRC assemblies, where BAKIR was able to improve many of the associated annotations and call novel variants. BAKIR is freely available on GitHub, offering ease of access and use through multiple installation methods, including pip, conda, and singularity container, and is equipped with a user-friendly command-line interface, thereby promoting its adoption in the scientific community.

BAKIR is available at github.com/algo-cancer/bakir.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid39429921, year = {2024}, author = {van Dam, L and Cruz-Morales, P and Rodriguez Valerón, N and Calheiros de Carvalho, A and Prado Vásquez, D and Lübke, M and Kloster Pedersen, L and Munk, R and Sommer, MOA and Jahn, LJ}, title = {GastronOmics: Edibility and safety of mycelium of the oyster mushroom Pleurotus ostreatus.}, journal = {Current research in food science}, volume = {9}, number = {}, pages = {100866}, pmid = {39429921}, issn = {2665-9271}, abstract = {Food production is one of the most environmentally damaging human activities. In the face of climate change, it is essential to rethink our dietary habits and explore potential alternative foods catering both towards human and planetary needs. Fungal mycelium might be an attractive alternative protein source due to its rapid growth on sustainable substrates as well as promising nutritional and organoleptic properties. The natural biodiversity of filamentous fungi is vast and represents an untapped reservoir for food innovation. However, fungi are known to produce bioactive compounds that may affect human health, both positively and negatively. To narrow the search for safe and culinarily attractive fungal species, mycelia of edible fruiting-body forming fungi provide a promising starting point. Here, we explore whether the culinary attractiveness and safety of the commonly eaten mushroom, Pleurotus ostreatus, can also be translated to its mycelium. Whole-genome sequencing and pan-genome analysis revealed a high degree of genetic variability within the genus Pleurotus, suggesting that gastronomic traits as well as food safety may differ between strains. A representative strain, P. ostreatus M2191, was further analyzed for the food safety, nutritional properties and culinary applicability of its mycelium. No regulated mycotoxins were detected in either the fruiting body nor the mycelium. Yet, P. ostreatus is known to produce four peptide toxins, Ostreatin, Ostreolysin and Pleurotoysin A/B. These were found to be lower in the mycelium compared to fruiting bodies, which are already considered safe for consumption. Instead, a number of secondary metabolites with potential health benefits were detected in the fungal mycelium. In silico analysis of the proteome suggested low allergenicity. In addition, the fruiting body and the mycelium showed similar nutritional value, which was dependent on the growth substrate. To highlight the culinary potential of mycelium, we created a dish served at the two-star restaurant the Alchemist in Copenhagen, Denmark. Sensory analysis of the mycelium dish by an untrained consumer panel indicated consumer liking and openness to fungal mycelia. Based on sustainability, safety, culinary potential, and consumer acceptance, our findings suggest that P. ostreatus mycelium has great potential for use as a novel food source.}, } @article {pmid39428470, year = {2024}, author = {Soares, R and Fonseca, BM and Nash, BW and Paquete, CM and Louro, RO}, title = {A survey of the Desulfuromonadia "cytochromome" provides a glimpse of the unexplored diversity of multiheme cytochromes in nature.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {982}, pmid = {39428470}, issn = {1471-2164}, support = {PTDC/BIA-BQM/4143/2021//Fundação para a Ciência e a Tecnologia/ ; }, mesh = {*Heme/metabolism/chemistry ; Phylogeny ; Genetic Variation ; Computational Biology/methods ; Models, Molecular ; Cytochromes c/metabolism/genetics/chemistry ; Amino Acid Motifs ; }, abstract = {BACKGROUND: Multiheme cytochromes c (MHC) provide prokaryotes with a broad metabolic versatility that contributes to their role in the biogeochemical cycling of the elements and in energy production in bioelectrochemical systems. However, MHC have only been isolated and studied in detail from a limited number of species. Among these, Desulfuromonadia spp. are particularly MHC-rich. To obtain a broad view of the diversity of MHC, we employed bioinformatic tools to study the cytochromome encoded in the genomes of the Desulfuromonadia class.

RESULTS: We found that the distribution of the MHC families follows a different pattern between the two orders of the Desulfuromonadia class and that there is great diversity in the number of heme-binding motifs in MHC. However, the vast majority of MHC have up to 12 heme-binding motifs. MHC predicted to be extracellular are the least conserved and show high diversity, whereas inner membrane MHC are well conserved and show lower diversity. Although the most prevalent MHC have homologues already characterized, nearly half of the MHC families in the Desulforomonadia class have no known characterized homologues. AlphaFold2 was employed to predict their 3D structures. This provides an atlas of novel MHC, including examples with high beta-sheet content and nanowire MHC with unprecedented high numbers of putative heme cofactors per polypeptide.

CONCLUSIONS: This work illuminates for the first time the universe of experimentally uncharacterized cytochromes that are likely to contribute to the metabolic versatility and to the fitness of Desulfuromonadia in diverse environmental conditions and to drive biotechnological applications of these organisms.}, } @article {pmid38979181, year = {2024}, author = {Gluck-Thaler, E and Forsythe, A and Puerner, C and Stajich, JE and Croll, D and Cramer, RA and Vogan, AA}, title = {Giant transposons promote strain heterogeneity in a major fungal pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.06.28.601215}, pmid = {38979181}, issn = {2692-8205}, abstract = {UNLABELLED: Fungal infections are difficult to prevent and treat in large part due to strain heterogeneity. However, the genetic mechanisms driving pathogen variation remain poorly understood. Here, we determined the extent to which Starships -giant transposons capable of mobilizing numerous fungal genes-generate genetic and phenotypic variability in the human pathogen Aspergillus fumigatus . We analyzed 519 diverse strains, including 12 newly sequenced with long-read technology, to reveal 20 distinct Starships that are generating genomic heterogeneity over timescales potentially relevant for experimental reproducibility. Starship -mobilized genes encode diverse functions, including biofilm-related virulence factors and biosynthetic gene clusters, and many are differentially expressed during infection and antifungal exposure in a strain-specific manner. These findings support a new model of fungal evolution wherein Starships help generate variation in gene content and expression among fungal strains. Together, our results demonstrate that Starships are a previously hidden mechanism generating genotypic and, in turn, phenotypic heterogeneity in a major human fungal pathogen.

IMPORTANCE: No "one size fits all" option exists for treating fungal infections in large part due to genetic and phenotypic variation among strains. Accounting for strain heterogeneity is thus fundamental for developing efficacious treatments and strategies for safeguarding human health. Here, we report significant progress towards achieving this goal by uncovering a previously hidden mechanism generating heterogeneity in the major human fungal pathogen Aspergillus fumigatus : giant transposons called Starships that span dozens of kilobases and mobilize fungal genes as cargo. By conducting the first systematic investigation of these unusual transposons in a single fungal species, we demonstrate their contributions to population-level variation at the genome, pangenome and transcriptome levels. The Starship atlas we developed will not only help account for variation introduced by these elements in laboratory experiments but will serve as a foundational resource for determining how Starships shape clinically-relevant phenotypes, such as antifungal resistance and pathogenicity.}, } @article {pmid39423715, year = {2024}, author = {Munim, MA and Tanni, AA and Hossain, MM and Chakma, K and Mannan, A and Islam, SMR and Tiwari, JG and Gupta, SD}, title = {Whole genome sequencing of multidrug-resistant Klebsiella pneumoniae from poultry in Noakhali, Bangladesh: Assessing risk of transmission to humans in a pilot study.}, journal = {Comparative immunology, microbiology and infectious diseases}, volume = {114}, number = {}, pages = {102246}, doi = {10.1016/j.cimid.2024.102246}, pmid = {39423715}, issn = {1878-1667}, abstract = {BACKGROUND: Multi-drug resistant (MDR) Klebsiella pneumoniae is a public health concern due to its presence in Bangladeshi poultry products and its ability to spread resistance genes. This study genetically characterizes a distinct MDR K. pneumoniae isolate from the gut of poultry in Noakhali, Bangladesh, offering insights into its resistance mechanisms and public health impact.

METHODS: Klebsiella pneumoniae isolates from broiler and layer poultry were identified using biochemical and molecular analyses. Eleven isolates were tested for antibiotic sensitivity and categorized by their Multiple Antibiotic Resistance Index (MARI) profiles. The isolate with the highest MARI was selected for whole-genome sequencing using Illumina technology. The sequencing data were analyzed for genome annotation, pan-genome analysis, genome similarities, sequence type identification, and the identification of genetic determinants of resistance and virulence genes.

RESULT: We identified 10 MARI profiles among 11 K. pneumoniae isolates, with values ranging from 0.64 to 0.94. The highest MARI of 0.94 was found in an isolate from a layer poultry. This isolate's genome, 5401,789 base pairs long with 89.6 % coverage, showed potential inter-species dissemination, as indicated by core genome phylogenetic analysis. It possessed genes conferring resistance to fluoroquinolones, aminoglycosides, β-lactams, folate pathway antagonists, fosfomycin, macrolides, quinolones, rifamycin, tetracyclines, and polymyxins, including colistin.

CONCLUSION: Poultry serve as reservoirs for MDR K. pneumoniae, which can spread to other species and pose significant health risks. Rigorous monitoring of antibiotic use and genetic characterization of MDR bacterial isolates are essential to mitigate this threat.}, } @article {pmid39423139, year = {2024}, author = {Gao, S and Zhang, Y and Bush, SJ and Wang, B and Yang, X and Ye, K}, title = {Centromere Landscapes Resolved from Hundreds of Human Genomes.}, journal = {Genomics, proteomics & bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/gpbjnl/qzae071}, pmid = {39423139}, issn = {2210-3244}, abstract = {High-fidelity (HiFi) sequencing has facilitated the assembly and analysis of the most repetitive region of the genome, the centromere. Nevertheless, our current understanding of human centromeres is based on a relatively small number of telomere-to-telomere assemblies, which has not yet captured its full diversity. In this study, we investigated the genomic diversity of human centromere higher order repeats (HORs) via both HiFi reads and haplotype-resolved assemblies from hundreds of samples drawn from ongoing pangenome-sequencing projects and reprocessed them via a novel HOR annotation pipeline, HiCAT-human. We used this wealth of data to provide a global survey of the centromeric HOR landscape; in particular, we found that 23 HORs presented significant copy number variability between populations. We detected three centromere genotypes with unbalanced population frequencies on chromosomes 5, 8, and 17. An inter-assembly comparison of HOR loci further revealed that while HOR array structures are diverse, they nevertheless tend to form a number of specific landscapes, each exhibiting different levels of HOR subunit expansion and possibly reflecting a cyclical evolutionary transition from homogeneous to nested structures and back.}, } @article {pmid39421249, year = {2024}, author = {Mueller, KD and Panzetta, ME and Davey, L and McCann, JR and Rawls, JF and Flores, GE and Valdivia, RH}, title = {Pangenomic analysis identifies correlations between Akkermansia species and subspecies and human health outcomes.}, journal = {Microbiome research reports}, volume = {3}, number = {3}, pages = {33}, pmid = {39421249}, issn = {2771-5965}, abstract = {Aim: Akkermansia are common members of the human gastrointestinal microbiota. The prevalence of these mucophilic bacteria, especially Akkermansia muciniphila (A. muciniphila), correlates with immunological and metabolic health. The genus Akkermansia in humans includes species with significantly larger genomes than A. muciniphila, leading us to postulate that this added genetic content may influence how they impact human metabolic and immunological health. Methods: We conducted a pangenomic analysis of 234 Akkermansia complete or near-complete genomes. We also used high-resolution species and subspecies assignments to reanalyze publicly available metagenomic datasets to determine if there are relationships between Akkermansia species and A. muciniphila clades with various disease outcomes. Results: Analysis of genome-wide average nucleotide identity, 16S rRNA gene identity, conservation of core Akkermansia genes, and analysis of the fatty acid composition of representative isolates support the partitioning of the genus Akkermansia into several species. In addition, A. muciniphila sensu stricto, the most prevalent Akkermansia species in humans, should be subdivided into two subspecies. For a pediatric cohort, we observed species-specific correlations between Akkermansia abundance with baseline obesity or after various interventions. For inflammatory bowel disease cohorts, we identified a decreased abundance of Akkermansia in patients with ulcerative colitis or Crohn's disease, which was species and subspecies-dependent. In patients undergoing immune checkpoint inhibitor therapies for non-small cell lung carcinoma, we observed a significant association between one A. muciniphila subspecies and survival outcomes. Conclusion: Our findings suggest that the prevalence of specific Akkermansia species and/or subspecies can be crucial in evaluating their association with human health, particularly in different disease contexts, and is an important consideration for their use as probiotics.}, } @article {pmid39417972, year = {2024}, author = {Chaudhari, DN and Ahire, JJ and Devkatte, AN and Kulthe, AA}, title = {Complete Genome Sequence and Probiotic Characterization of Lactobacillus delbrueckii subsp. Indicus DC-3 Isolated from Traditional Indigenous Fermented Milk.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {39417972}, issn = {1867-1314}, abstract = {In this study, Lactobacillus delbrueckii subsp. indicus DC-3 was isolated from Indian traditional indigenous fermented milk Dahi and identified using whole genome sequencing. The safety of the strain was evaluated using genetic and phenotypic analyses, such as the presence of virulence factors, mobile and insertion elements, plasmids, antibiotic resistance, etc. Besides this, the strain was comprehensively investigated for in vitro probiotic traits, biofilm formation, antibacterials, and exopolysaccharide (EPS) production. In results, the strain showed a single circular chromosome (3,145,837 bp) with a GC content of 56.73%, a higher number of accessory and unique genes, an open pan-genome, and the absence of mobile and insertion elements, plasmids, virulence, and transmissible antibiotic resistance genes. The strain was capable of surviving in gastric juice (83% viability at 3 h) and intestinal juice (71% viability at 6 h) and showed 42.5% autoaggregation, adhesion to mucin, 8.7% adhesion to xylene, and 8.3% adhesion to Caco-2 cells. The γ-hemolytic nature, usual antibiotic susceptibility profile, and negative results for mucin and gelatin degradation ensure the safety of the strain. The strain produced 10.5 g/L of D-lactic acid and hydrogen peroxide, capable of inhibiting and co-aggregating Escherichia coli MTCC 1687, Proteus mirabilis MTCC 425, and Candida albicans ATCC 14,053. In addition, the strain showed 90 mg/L EPS (48 h) and biofilm formation. In conclusion, this study demonstrates that L. delbrueckii subsp. indicus DC-3 is unique and different than previously reported L. delbrueckii subsp. indicus strains and is a safe potential probiotic candidate.}, } @article {pmid39417074, year = {2024}, author = {Wang, L and Chen, S and Xing, M and Dong, L and Zhu, H and Lin, Y and Li, J and Sun, T and Zhu, X and Wang, X}, title = {Genome characterization of Shewanella algae in Hainan Province, China.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1474871}, doi = {10.3389/fmicb.2024.1474871}, pmid = {39417074}, issn = {1664-302X}, abstract = {Shewanella algae is an emerging marine zoonotic pathogen. In this study, we first reported the Shewanella algae infections in patients and animals in Hainan Province, China. Currently, there is still relatively little known about the whole-genome characteristics of Shewanella algae in most tropical regions, including in southern China. Here, we sequenced the 62 Shewanella algae strains isolated from Hainan Province and combined with the whole genomes sequences of 144 Shewanella algae genomes from public databases to analyze genomic features. Phylogenetic analysis revealed that Shewanella algae is widely distributed in the marine environments of both temperate and tropical countries, exhibiting close phylogenetic relationships with genomes isolated from patients, animals, and plants. Thereby confirming that exposure to marine environments is a risk factor for Shewanella algae infections. Average nucleotide identity analysis indicated that the clonally identical genomes could be isolated from patients with different sample types at different times. Pan-genome analysis identified a total of 21,909 genes, including 1,563 core genes, 8,292 strain-specific genes, and 12,054 accessory genes. Multiple putative virulence-associated genes were identified, encompassing 14 categories and 16 subcategories, with 171 distinct virulence factors. Three different plasmid replicon types were detected in 33 genomes. Eleven classes of antibiotic resistance genes and 352 integrons were identified. Antimicrobial susceptibility testing revealed a high resistance rate to imipenem and colistin among the strains studied, with 5 strains exhibiting multidrug resistance. However, they were all sensitive to amikacin, minocycline, and tigecycline. Our findings clarify the genomic characteristics and population structure of Shewanella algae in Hainan Province. The results offer insights into the genetic basis of pathogenicity in Shewanella algae and enhance our understanding of its global phylogeography.}, } @article {pmid39416140, year = {2024}, author = {Majernik, SN and Beaver, L and Bradley, PH}, title = {Small amounts of misassembly can have disproportionate effects on pangenome-based metagenomic analyses.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.10.11.617902}, pmid = {39416140}, issn = {2692-8205}, abstract = {Individual genes from microbiomes can drive host-level phenotypes. To help identify such candidate genes, several recent tools estimate microbial gene copy numbers directly from metagenomes. These tools rely on alignments to pangenomes, which in turn are derived from the set of all individual genomes from one species. While large-scale metagenomic assembly efforts have made pangenome estimates more complete, mixed communities can also introduce contamination into assemblies, and it is unknown how robust pangenome-based metagenomic analyses are to these errors. To gain insight into this problem, we re-analyzed a case-control study of the gut microbiome in cirrhosis, focusing on commensal Clostridia previously implicated in this disease. We tested for differentially prevalent genes in the Lachnospiraceae , then investigated which were likely to be contaminants using sequence similarity searches. Out of 86 differentially prevalent genes, we found that 33 (38%) were probably contaminants originating in taxa such as Veillonella and Haemophilus , unrelated genera that were independently correlated with disease status. Our results demonstrate that even small amounts of contamination in metagenome assemblies, below typical quality thresholds, can threaten to overwhelm gene-level metagenomic analyses. However, we also show that such contaminants can be accurately identified using a method based on gene-to-species correlation. After removing these contaminants, we observe that several flagellar motility gene clusters in the Lachnospira eligens pangenome are associated with cirrhosis status. We have integrated our analyses into an analysis and visualization pipeline, PanSweep, that can automatically identify cases where pangenome contamination may bias the results of gene-resolved analyses.}, } @article {pmid39415996, year = {2024}, author = {Rubin, J and van Waaij, J and Kraft, L and Sirén, J and Sackett, PW and Renaud, G}, title = {SAFARI: Pangenome Alignment of Ancient DNA Using Purine/Pyrimidine Encodings.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.12.607489}, pmid = {39415996}, issn = {2692-8205}, abstract = {Aligning DNA sequences retrieved from fossils or other paleontological artifacts, referred to as ancient DNA, is particularly challenging due to the short sequence length and chemical damage which creates a specific pattern of substitution (C→T and G→A) in addition to the heightened divergence between the sample and the reference genome thus exacerbating reference bias. This bias can be mitigated by aligning to pangenome graphs to incorporate documented organismic variation, but this approach still suffers from substitution patterns due to chemical damage. We introduce a novel methodology introducing the RYmer index, a variant of the commonly-used minimizer index which represents purines (A,G) and pyrimidines (C,T) as R and Y respectively. This creates an indexing scheme robust to the aforementioned chemical damage. We implemented SAFARI , an ancient DNA damage-aware version of the pangenome aligner vg giraffe which uses RYmers to rescue alignments containing deaminated seeds. We show that our approach produces more correct alignments from ancient DNA sequences than current approaches while maintaining a tolerable rate of spurious alignments. In addition, we demonstrate that our algorithm improves the estimate of the rate of ancient DNA damage, especially for highly damaged samples. Crucially, we show that this improved alignment can directly translate into better insights gained from the data by showcasing its integration with a number of extant pangenome tools.}, } @article {pmid39415896, year = {2024}, author = {Ghatak, S and Milton, AAP and Das, S and Momin, KM and Srinivas, K and Pyngrope, DA and Priya, GB}, title = {Campylobacter coli of porcine origin exhibits an open pan-genome within a single clonal complex: insights from comparative genomic analysis.}, journal = {Frontiers in cellular and infection microbiology}, volume = {14}, number = {}, pages = {1449856}, doi = {10.3389/fcimb.2024.1449856}, pmid = {39415896}, issn = {2235-2988}, mesh = {Animals ; Swine ; *Campylobacter coli/genetics/isolation & purification/classification ; *Genome, Bacterial ; *Phylogeny ; *Multilocus Sequence Typing ; *Genomics ; *Campylobacter Infections/microbiology/veterinary ; Feces/microbiology ; Swine Diseases/microbiology ; Bacteriophages/genetics ; Virulence Factors/genetics ; }, abstract = {INTRODUCTION: Although Campylobacter spp., including Campylobacter coli, have emerged as important zoonotic foodborne pathogens globally, the understanding of the genomic epidemiology of C. coli of porcine origin is limited.

METHODS: As pigs are an important reservoir of C. coli, we analyzed C. coli genomes that were isolated (n = 3) from pigs and sequenced (this study) them along with all other C. coli genomes for which pig intestines, pig feces, and pigs were mentioned as sources in the NCBI database up to January 6, 2023. In this paper, we report the pan-genomic features, the multi-locus sequence types, the resistome, virulome, and mobilome, and the phylogenomic analysis of these organisms that were obtained from pigs.

RESULTS AND DISCUSSION: Our analysis revealed that, in addition to having an open pan-genome, majority (63%) of the typeable isolates of C. coli of pig origin belonged to a single clonal complex, ST-828. The resistome of these C. coli isolates was predominated by the genes tetO (53%), blaOXA-193 (49%), and APH (3')-IIIa (21%); however, the virulome analysis revealed a core set of 37 virulence genes. Analysis of the mobile genetic elements in the genomes revealed wide diversity of the plasmids and bacteriophages, while 30 transposons were common to all genomes of C. coli of porcine origin. Phylogenomic analysis showed two discernible clusters comprising isolates originating from Japan and another set of isolates comprising mostly copies of a type strain stored in three different culture collections.}, } @article {pmid39415203, year = {2024}, author = {Tong, X and Luo, D and Leung, MHY and Lee, JYY and Shen, Z and Jiang, W and Mason, CE and Lee, PKH}, title = {Diverse and specialized metabolic capabilities of microbes in oligotrophic built environments.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {198}, pmid = {39415203}, issn = {2049-2618}, support = {BK20230230//Jiangsu Science and Technology Programme/ ; 11214721//Hong Kong Research Grants Council, General Research Fund/ ; R1016-20F//Hong Kong Research Grants Council, Research Impact Fund/ ; }, mesh = {Humans ; Hong Kong ; *Microbiota ; *Built Environment ; *Metagenome ; *Phylogeny ; *Bacteria/classification/genetics/metabolism/isolation & purification ; Skin/microbiology ; Micrococcus luteus/genetics/metabolism ; Genome, Bacterial ; }, abstract = {BACKGROUND: Built environments (BEs) are typically considered to be oligotrophic and harsh environments for microbial communities under normal, non-damp conditions. However, the metabolic functions of microbial inhabitants in BEs remain poorly understood. This study aimed to shed light on the functional capabilities of microbes in BEs by analyzing 860 representative metagenome-assembled genomes (rMAGs) reconstructed from 738 samples collected from BEs across the city of Hong Kong and from the skin surfaces of human occupants. The study specifically focused on the metabolic functions of rMAGs that are either phylogenetically novel or prevalent in BEs.

RESULTS: The diversity and composition of BE microbiomes were primarily shaped by the sample type, with Micrococcus luteus and Cutibacterium acnes being prevalent. The metabolic functions of rMAGs varied significantly based on taxonomy, even at the strain level. A novel strain affiliated with the Candidatus class Xenobia in the Candidatus phylum Eremiobacterota and two novel strains affiliated with the superphylum Patescibacteria exhibited unique functions compared with their close relatives, potentially aiding their survival in BEs and on human skins. The novel strains in the class Xenobia possessed genes for transporting nitrate and nitrite as nitrogen sources and nitrosative stress mitigation induced by nitric oxide during denitrification. The two novel Patescibacteria strains both possessed a broad array of genes for amino acid and trace element transport, while one of them carried genes for carotenoid and ubiquinone biosynthesis. The globally prevalent M. luteus in BEs displayed a large and open pangenome, with high infraspecific genomic diversity contributed by 11 conspecific strains recovered from BEs in a single geographic region. The versatile metabolic functions encoded in the large accessory genomes of M. luteus may contribute to its global ubiquity and specialization in BEs.

CONCLUSIONS: This study illustrates that the microbial inhabitants of BEs possess metabolic potentials that enable them to tolerate and counter different biotic and abiotic conditions. Additionally, these microbes can efficiently utilize various limited residual resources from occupant activities, potentially enhancing their survival and persistence within BEs. A better understanding of the metabolic functions of BE microbes will ultimately facilitate the development of strategies to create a healthy indoor microbiome. Video Abstract.}, } @article {pmid39415087, year = {2024}, author = {Zdąbłasz, K and Lisiecka, A and Dojer, N}, title = {Sequence Flow: interactive web application for visualizing partial order alignments.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {973}, pmid = {39415087}, issn = {1471-2164}, support = {01/IDUB/2019/04//Polish Ministry of Science and Higher Education/ ; 01/IDUB/2019/04//Polish Ministry of Science and Higher Education/ ; 01/IDUB/2019/04//Polish Ministry of Science and Higher Education/ ; }, mesh = {*Internet ; *Software ; *Sequence Alignment/methods ; Computational Biology/methods ; User-Computer Interface ; Computer Graphics ; }, abstract = {BACKGROUND: Multiple sequence alignment (MSA) has proven extremely useful in computational biology, especially in inferring evolutionary relationships via phylogenetic analysis and providing insight into protein structure and function. An alternative to the standard MSA model is partial order alignment (POA), in which aligned sequences are represented as paths in a graph rather than rows in a matrix. While the POA model has proven useful in several applications (e.g. sequencing reads assembly and pangenome structure exploration), we lack efficient visualization tools that could highlight its advantages.

RESULTS: We propose Sequence Flow - a web application designed to address the above problem. Sequence Flow presents the POA as a Sankey diagram, a kind of graph visualisation typically used for graphs representing flowcharts. Sequence Flow enables interactive alignment exploration, including fragment selection, highlighting a selected group of sequences, modification of the position of graph nodes, structure simplification etc. After adjustment, the visualization can be saved as a high-quality graphic file. Thanks to the use of SanKEY.js - a JavaScript library for creating Sankey diagrams, designed specifically to visualize POAs, Sequence Flow provides satisfactory performance even with large alignments.

CONCLUSIONS: We provide Sankey diagram-based POA visualization tools for both end users (Sequence Flow) and bioinformatic software developers (SanKEY.js). Sequence Flow webservice is available at https://sequenceflow.mimuw.edu.pl/ . The source code for SanKEY.js is available at https://github.com/Krzysiekzd/SanKEY.js and for Sequence Flow at https://github.com/Krzysiekzd/SequenceFlow .}, } @article {pmid39414821, year = {2024}, author = {Groza, C and Chen, X and Wheeler, TJ and Bourque, G and Goubert, C}, title = {A unified framework to analyze transposable element insertion polymorphisms using graph genomes.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {8915}, pmid = {39414821}, issn = {2041-1723}, mesh = {*DNA Transposable Elements/genetics ; *Polymorphism, Genetic ; Humans ; *Drosophila melanogaster/genetics ; *Zea mays/genetics ; Animals ; Algorithms ; Genome, Plant/genetics ; Genomics/methods ; Software ; Sequence Analysis, DNA/methods ; }, abstract = {Transposable elements are ubiquitous mobile DNA sequences generating insertion polymorphisms, contributing to genomic diversity. We present GraffiTE, a flexible pipeline to analyze polymorphic mobile elements insertions. By integrating state-of-the-art structural variant detection algorithms and graph genomes, GraffiTE identifies polymorphic mobile elements from genomic assemblies or long-read sequencing data, and genotypes these variants using short or long read sets. Benchmarking on simulated and real datasets reports high precision and recall rates. GraffiTE is designed to allow non-expert users to perform comprehensive analyses, including in models with limited transposable element knowledge and is compatible with various sequencing technologies. Here, we demonstrate the versatility of GraffiTE by analyzing human, Drosophila melanogaster, maize, and Cannabis sativa pangenome data. These analyses reveal the landscapes of polymorphic mobile elements and their frequency variations across individuals, strains, and cultivars.}, } @article {pmid39408827, year = {2024}, author = {Urrutia, C and Leyton-Carcaman, B and Abanto Marin, M}, title = {Contribution of the Mobilome to the Configuration of the Resistome of Corynebacterium striatum.}, journal = {International journal of molecular sciences}, volume = {25}, number = {19}, pages = {}, doi = {10.3390/ijms251910499}, pmid = {39408827}, issn = {1422-0067}, mesh = {*Corynebacterium/genetics ; *Phylogeny ; Humans ; Plasmids/genetics ; Genome, Bacterial ; Corynebacterium Infections/microbiology ; Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; DNA Transposable Elements/genetics ; Integrons/genetics ; Drug Resistance, Multiple, Bacterial/genetics ; Interspersed Repetitive Sequences/genetics ; }, abstract = {Corynebacterium striatum, present in the microbiota of human skin and nasal mucosa, has recently emerged as a causative agent of hospital-acquired infections, notable for its resistance to multiple antimicrobials. Its mobilome comprises several mobile genetic elements, such as plasmids, transposons, insertion sequences and integrons, which contribute to the acquisition of antimicrobial resistance genes. This study analyzes the contribution of the C. striatum mobilome in the transfer and dissemination of resistance genes. In addition, integrative and conjugative elements (ICEs), essential in the dissemination of resistance genes between bacterial populations, whose role in C. striatum has not yet been studied, are examined. This study examined 365 C. striatum genomes obtained from the NCBI Pathogen Detection database. Phylogenetic and pangenome analyses were performed, the resistance profile of the bacterium was recognized, and mobile elements, including putative ICE, were detected. Bioinformatic analyses identified 20 antimicrobial resistance genes in this species, with the Ermx gene being the most predominant. Resistance genes were mainly associated with plasmid sequence regions and class 1 integrons. Although an ICE was detected, no resistance genes linked to this element were found. This study provided valuable information on the geographic spread and prevalence of outbreaks observed through phylogenetic and pangenome analyses, along with identifying antimicrobial resistance genes and mobile genetic elements that carry many of the resistance genes and may be the subject of future research and therapeutic approaches.}, } @article {pmid39402664, year = {2024}, author = {Ndiaye, M and Prieto-Baños, S and Fitzgerald, LM and Yazdizadeh Kharrazi, A and Oreshkov, S and Dessimoz, C and Sedlazeck, FJ and Glover, N and Majidian, S}, title = {When less is more: sketching with minimizers in genomics.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {270}, pmid = {39402664}, issn = {1474-760X}, abstract = {The exponential increase in sequencing data calls for conceptual and computational advances to extract useful biological insights. One such advance, minimizers, allows for reducing the quantity of data handled while maintaining some of its key properties. We provide a basic introduction to minimizers, cover recent methodological developments, and review the diverse applications of minimizers to analyze genomic data, including de novo genome assembly, metagenomics, read alignment, read correction, and pangenomes. We also touch on alternative data sketching techniques including universal hitting sets, syncmers, or strobemers. Minimizers and their alternatives have rapidly become indispensable tools for handling vast amounts of data.}, } @article {pmid39400347, year = {2024}, author = {Ma, A and Sun, J and Feng, L and Xue, Z and Wu, W and Song, B and Xiong, X and Wang, X and Han, B and Osbourn, A and Qi, X}, title = {Functional diversity of oxidosqualene cyclases in genus Oryza.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.20175}, pmid = {39400347}, issn = {1469-8137}, support = {31920103003//National Natural Science Foundation of China/ ; BB/X01097X/1//the BBSRC Institute Strategic Programme Grant 'Harnessing Biosynthesis for Sustainable Food and Health (HBio)/ ; }, abstract = {Triterpene skeletons, catalyzing by 2,3-oxidosqualene cyclases (OSCs), are essential for synthesis of steroids and triterpenoids. In japonica rice cultivars Zhonghua11, a total of 12 OsOSCs have been found. While the catalytic functions of OsOSC1, 3, 4, 9, and 10 remain unclear, the functions of the other OsOSCs have been well studied. In this study, we conducted a comprehensive analysis of 12 OSC genes within genus Oryza with the aid of 63 genomes from cultivated and wild rice. We found that OSC genes are relatively conserved within genus Oryza with a few exceptions. Collinearity analysis further suggested that, throughout the evolutionary history of genus Oryza, the OSC genes have not undergone significant rearrangements or losses. Further functional analysis of 5 uncharacterized OSCs revealed that OsOSC10 was a friedelin synthase, which affected the development of rice grains. Additionally, the reconstructed ancestral sequences of Oryza OSC3 and Oryza OSC9 had lupeol synthase and poaceatapetol synthase activity, respectively. The discovery of friedelin synthase in rice unlocks a new catalytic path and biological function of OsOSC10. The pan-genome analysis of OSCs within genus Oryza gives insights into the evolutionary trajectory and products diversity of Oryza OSCs.}, } @article {pmid39400346, year = {2024}, author = {Heumos, S and Heuer, ML and Hanssen, F and Heumos, L and Guarracino, A and Heringer, P and Ehmele, P and Prins, P and Garrison, E and Nahnsen, S}, title = {Cluster-efficient pangenome graph construction with nf-core/pangenome.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae609}, pmid = {39400346}, issn = {1367-4811}, abstract = {MOTIVATION: Pangenome graphs offer a comprehensive way of capturing genomic variability across multiple genomes. However, current construction methods often introduce biases, excluding complex sequences or relying on references. The PanGenome Graph Builder (PGGB) addresses these issues. To date, though, there is no state-of-the-art pipeline allowing for easy deployment, efficient and dynamic use of available resources, and scalable usage at the same time.

RESULTS: To overcome these limitations, we present nf-core/pangenome, a reference-unbiased approach implemented in Nextflow following nf-core's best practices. Leveraging biocontainers ensures portability and seamless deployment in HPC environments. Unlike PGGB, nf-core/pangenome distributes alignments across cluster nodes, enabling scalability. Demonstrating its efficiency, we constructed pangenome graphs for 1000 human chromosome 19 haplotypes and 2146 E. coli sequences, achieving a two to threefold speedup compared to PGGB without increasing greenhouse gas emissions.

AVAILABILITY: Nf-core/pangenome is released under the MIT open-source license, available on GitHub and Zenodo, with documentation accessible at https://nf-co.re/pangenome/1.1.2/docs/usage.

SUPPLEMENTARY: Supplementary data are available at Bioinformatics online.}, } @article {pmid39398200, year = {2024}, author = {Roberts, MD and Davis, O and Josephs, EB and Williamson, RJ}, title = {k-mer-based approaches to bridging pangenomics and population genetics.}, journal = {ArXiv}, volume = {}, number = {}, pages = {}, pmid = {39398200}, issn = {2331-8422}, abstract = {Many commonly studied species now have more than one chromosome-scale genome assembly, revealing a large amount of genetic diversity previously missed by approaches that map short reads to a single reference. However, many species still lack multiple reference genomes and correctly aligning references to build pangenomes is challenging, limiting our ability to study this missing genomic variation in population genetics. Here, we argue that $k$-mers are a crucial stepping stone to bridging the reference-focused paradigms of population genetics with the reference-free paradigms of pangenomics. We review current literature on the uses of $k$-mers for performing three core components of most population genetics analyses: identifying, measuring, and explaining patterns of genetic variation. We also demonstrate how different $k$-mer-based measures of genetic variation behave in population genetic simulations according to the choice of $k$, depth of sequencing coverage, and degree of data compression. Overall, we find that $k$-mer-based measures of genetic diversity scale consistently with pairwise nucleotide diversity ($\pi$) up to values of about $\pi = 0.025$ ($R^2 = 0.97$) for neutrally evolving populations. For populations with even more variation, using shorter $k$-mers will maintain the scalability up to at least $\pi = 0.1$. Furthermore, in our simulated populations, $k$-mer dissimilarity values can be reliably approximated from counting bloom filters, highlighting a potential avenue to decreasing the memory burden of $k$-mer based genomic dissimilarity analyses. For future studies, there is a great opportunity to further develop methods to identifying selected loci using $k$-mers.}, } @article {pmid39397345, year = {2024}, author = {Wiersma, AT and Hamilton, JP and Vaillancourt, B and Brose, J and Awale, HE and Wright, EM and Kelly, JD and Buell, CR}, title = {k-mer genome-wide association study for anthracnose and BCMV resistance in a Phaseolus vulgaris Andean Diversity Panel.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20523}, doi = {10.1002/tpg2.20523}, pmid = {39397345}, issn = {1940-3372}, support = {//Georgia Research Alliance/ ; //University of Georgia/ ; //Michigan State University/ ; //Michigan State University Plant Resilience Institute/ ; //Georgia Seed Development/ ; 2019-67012-29717//National Institute of Food and Agriculture/ ; 2022-67013-37119//National Institute of Food and Agriculture/ ; }, abstract = {Access to broad genomic resources and closely linked marker-trait associations for common beans (Phaseolus vulgaris L.) can facilitate development of improved varieties with increased yield, improved market quality traits, and enhanced disease resistance. The emergence of virulent races of anthracnose (caused by Colletotrichum lindemuthianum) and bean common mosaic virus (BCMV) highlight the need for improved methods to identify and incorporate pan-genomic variation in breeding for disease resistance. We sequenced the P. vulgaris Andean Diversity Panel (ADP) and performed a genome-wide association study (GWAS) to identify associations for resistance to BCMV and eight races of anthracnose. Historical single nucleotide polymorphism (SNP)-chip and phenotypic data enabled a three-way comparison between SNP-chip, reference-based whole genome shotgun sequence (WGS)-SNP, and reference-free k-mer (short nucleotide subsequence) GWAS. Across all traits, there was excellent concordance between SNP-chip, WGS-SNP, and k-mer GWAS results-albeit at a much higher marker resolution for the WGS data sets. Significant k-mer haplotype variation revealed selection of the linked I-gene and Co-u traits in North American breeding lines and cultivars. Due to structural variation, only 9.1 to 47.3% of the significantly associated k-mers could be mapped to the reference genome. Thus, to determine the genetic context of cis-associated k-mers, we generated draft whole genome assemblies of four ADP accessions and identified an expanded local repertoire of disease resistance genes associated with resistance to anthracnose and BCMV. With access to variant data in the context of a pan-genome, high resolution mapping of agronomic traits for common bean is now feasible.}, } @article {pmid39387603, year = {2024}, author = {Liang, J and Liu, B and Christensen, MJ and Li, C and Zhang, X and Nan, Z}, title = {The effects of Pseudomonas strains isolated from Achnatherum inebrians on plant growth: A genomic perspective.}, journal = {Environmental microbiology reports}, volume = {16}, number = {5}, pages = {e70011}, doi = {10.1111/1758-2229.70011}, pmid = {39387603}, issn = {1758-2229}, support = {jbky-2022-ey21//Fundamental Research Funds for the Central Universities, Lanzhou University/ ; 31772665//National Natural Science Foundation of China/ ; 32061123004//National Natural Science Foundation of China/ ; 23ZDNA009//Gansu Provincial Science and Technology Major Projects/ ; }, mesh = {*Pseudomonas/genetics/isolation & purification/classification/growth & development ; *Endophytes/genetics/isolation & purification/classification ; *Genome, Bacterial/genetics ; China ; Phylogeny ; Poaceae/microbiology/growth & development ; Plant Leaves/microbiology ; Plant Development ; Epichloe/genetics/growth & development/physiology/isolation & purification ; Photosynthesis ; Genomics ; Genomic Islands ; }, abstract = {Achnatherum inebrians is a perennial grass widely distributed in northwest China. Nearly all wild A. inebrians plants are infected by Epichloë endophytes. In this study, bacteria from the phyllosphere were isolated from leaves of both endophyte-free and endophyte-infected A. inebrians and sequenced for identification. Pseudomonas, comprising 48.12% of the culturable bacterial communities, was the most dominant bacterial genus. Thirty-four strains from 12 Pseudomonas species were used to inoculate A. inebrians seeds and plants. Results indicated that Epichloë significantly increased the diversity and richness index of the phyllosphere. Pseudomonas Sp1, Sp3, Sp5 and Sp7 had a significantly positive effect on plant growth and photosynthesis, whereas Sp10, Sp11 and Sp12 had a significantly negative effect. Whole-genome and pan-genome analysis suggested that the variability in the effects of Pseudomonas on A. inebrians was related to differences in genome composition and genomic islands.}, } @article {pmid39387591, year = {2024}, author = {Zhang, M and Yin, Z and Chen, B and Yu, Z and Liang, J and Tian, X and Li, D and Deng, X and Peng, L}, title = {Investigation of Citrobacter freundii clinical isolates in a Chinese hospital during 2020-2022 revealed genomic characterization of an extremely drug-resistant C. freundii ST257 clinical strain GMU8049 co-carrying blaNDM-1 and a novel blaCMY variant.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0425423}, doi = {10.1128/spectrum.04254-23}, pmid = {39387591}, issn = {2165-0497}, abstract = {The emergence of multidrug-resistant Citrobacter freundii poses a significant threat to public health. C. freundii isolates were collected from clinical patients in a Chinese hospital during 2020-2022. An unusual strain, GMU8049, was not susceptible to any of the antibiotics tested, including the novel β-lactam/β-lactamase inhibitor combination ceftazidime-avibactam. Whole-genome sequencing (WGS) revealed that GMU8049 harbors a circular chromosome belonging to the rare ST257 and an IncX3 resistance plasmid. Genomic analysis revealed the coexistence of two β-lactamase genes, including plasmid-mediated blaNDM-1 and chromosomal blaCMY encoding a novel CMY variant, combined with an outer membrane porin deficiency, which may account for the extreme resistance to β-lactams. Conjugation experiment confirmed that the blaNDM-1 resistance gene located on pGMU8049 could be successfully transferred to Escherichia coli EC600. The novel CMY variant had an amino acid substitution at position 106 (N106S) compared to the closely related CMY-51. Additionally, a GMU8049-specific truncation in an OmpK37 variant that produces a premature stop codon. Moreover, a variety of chromosome-located efflux pump coding genes and virulence-related genes were also identified. Analysis of strain GMU8049 in the context of other C. freundii strains reveals an open pan-genome and the presence of mobile genetic elements that can mediate horizontal gene transfer of antimicrobial resistance and virulence genes. Our work provides comprehensive insights into the genetic mechanisms of highly resistant C. freundii, highlighting the importance of genomic surveillance of this opportunistic pathogen as a high-risk population for emerging resistance and pathogenicity.IMPORTANCEEmerging pathogens exhibiting multi-, extremely, and pan-drug resistance are a major concern for hospitalized patients and the healthcare community due to limited antimicrobial treatment options and the potential for spread. Genomic technologies have enabled clinical surveillance of emerging pathogens and modeling of the evolution and transmission of antimicrobial resistance and virulence. Here, we report the genomic characterization of an extremely drug-resistant ST257 Citrobacter freundii clinical isolate. Genomic analysis of GMU8049 with a rare ST type and unusual phenotypes can provide information on how this extremely resistant clinical isolate has evolved, including the acquisition of blaNDM-1 via the IncX3 plasmid and accumulation through chromosomal mutations leading to a novel CMY variant and deficiency of the outer membrane porin OmpK37. Our work highlights that the emergence of extremely resistant C. freundii poses a significant challenge to the treatment of clinical infections. Therefore, great efforts must be made to specifically monitor this opportunistic pathogen.}, } @article {pmid39391977, year = {2024}, author = {Naser-Khdour, S and Scheuber, F and Fields, PD and Ebert, D}, title = {The Evolution of Extreme Genetic Variability in a Parasite-Resistance Complex.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae222}, pmid = {39391977}, issn = {1759-6653}, abstract = {Genomic regions that play a role in parasite defense are often found to be highly variable, with the MHC serving as an iconic example. Single nucleotide polymorphisms may represent only a small portion of this variability, with Indel polymorphisms and copy number variation further contributing. In extreme cases, haplotypes may no longer be recognized as orthologous. Understanding the evolution of such highly divergent regions is challenging because the most extreme variation is not visible using reference-assisted genomic approaches. Here we analyze the case of the Pasteuria Resistance Complex (PRC) in the crustacean Daphnia magna, a defense complex in the host against the common and virulent bacterium Pasteuria ramosa. Two haplotypes of this region have been previously described, with parts of it being non-homologous, and the region has been shown to be under balancing selection. Using pan-genome analysis and tree reconciliation methods to explore the evolution of the PRC and its characteristics within and between species of Daphnia and other Cladoceran species, our analysis revealed a remarkable diversity in this region even among host species, with many non-homologous hyper-divergent-haplotypes. The PRC is characterized by extensive duplication and losses of Fucosyltransferase (FuT) and Galactosyltransferase (GalT) genes that are believed to play a role in parasite defense. The PRC region can be traced back to common ancestors over 250 million years. The unique combination of an ancient resistance complex and a dynamic, hyper-divergent genomic environment presents a fascinating opportunity to investigate the role of such regions in the evolution and long-term maintenance of resistance polymorphisms. Our findings offer valuable insights into the evolutionary forces shaping disease resistance and adaptation, not only in the genus Daphnia, but potentially across the entire Cladocera class.}, } @article {pmid39391725, year = {2024}, author = {Ferro, E and Oliva, M and Gagie, T and Boucher, C}, title = {Building a pangenome alignment index via recursive prefix-free parsing.}, journal = {iScience}, volume = {27}, number = {10}, pages = {110933}, pmid = {39391725}, issn = {2589-0042}, abstract = {Pangenomics alignment offers a solution to reduce bias in biomedical research. Traditionally, short-read aligners like Bowtie and BWA indexed a single reference genome to find approximate alignments. These methods, limited by linear-memory requirements, can only index a few genomes. Emerging pangenome aligners, such as VG, Giraffe, and Moni, address this by indexing more genomes. VG and Giraffe use a variation graph, while Moni indexes sequences accounting for repetition using prefix-free parsing to build a dictionary and parse. The main challenge is the parse's size, which becomes significantly larger than the dictionary. To scale Moni, we propose removing the parse from the construction of the run-length encoded BWT (RLBWT), suffix array, and Longest Common Prefix (LCP) by applying prefix-free parsing recursively. This approach improves construction time and memory requirements, enabling efficient construction of RLBWT, suffix array, and LCP for large pangenomes, such as those from the Human Pangenome Reference Consortium.}, } @article {pmid39391331, year = {2024}, author = {Gabory, E and Mwaniki, MN and Pisanti, N and Pissis, SP and Radoszewski, J and Sweering, M and Zuba, W}, title = {Pangenome comparison via ED strings.}, journal = {Frontiers in bioinformatics}, volume = {4}, number = {}, pages = {1397036}, pmid = {39391331}, issn = {2673-7647}, abstract = {INTRODUCTION: An elastic-degenerate (ED) string is a sequence of sets of strings. It can also be seen as a directed acyclic graph whose edges are labeled by strings. The notion of ED strings was introduced as a simple alternative to variation and sequence graphs for representing a pangenome, that is, a collection of genomic sequences to be analyzed jointly or to be used as a reference.

METHODS: In this study, we define notions of matching statistics of two ED strings as similarity measures between pangenomes and, consequently infer a corresponding distance measure. We then show that both measures can be computed efficiently, in both theory and practice, by employing the intersection graph of two ED strings.

RESULTS: We also implemented our methods as a software tool for pangenome comparison and evaluated their efficiency and effectiveness using both synthetic and real datasets.

DISCUSSION: As for efficiency, we compare the runtime of the intersection graph method against the classic product automaton construction showing that the intersection graph is faster by up to one order of magnitude. For showing effectiveness, we used real SARS-CoV-2 datasets and our matching statistics similarity measure to reproduce a well-established clade classification of SARS-CoV-2, thus demonstrating that the classification obtained by our method is in accordance with the existing one.}, } @article {pmid39390673, year = {2024}, author = {Udaondo, Z and Ramos, JL and Abram, K}, title = {Unraveling the Genomic Diversity of the Pseudomonas putida Group: Exploring Taxonomy, Core Pangenome, and Antibiotic Resistance Mechanisms.}, journal = {FEMS microbiology reviews}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsre/fuae025}, pmid = {39390673}, issn = {1574-6976}, abstract = {The genus Pseudomonas is characterized by its rich genetic diversity, with over 300 species been validly recognized. This reflects significant progress made through sequencing and computational methods. Pseudomonas putida group comprises highly adaptable species that thrive in diverse environments and play various ecological roles, from promoting plant growth to being pathogenic in immunocompromised individuals. By leveraging the GRUMPS computational pipeline, we scrutinized 26363 genomes labeled as Pseudomonas in NCBI GenBank, categorizing all Pseudomonas spp. genomes into 435 distinct species-level clusters or cliques. We identified 224 strains deposited under the taxonomic identifier "Pseudomonas putida" distributed within 31 of these species-level clusters, challenging prior classifications. Nine of these 31 cliques contained at least six genomes labeled as "Pseudomonas putida" and were analyzed in depth, particularly clique_1 (P. alloputida) and clique_2 (P. putida). Pangenomic analysis of a set of 413 P. putida group strains revealed over 2.2 million proteins and more than 77000 distinct protein families. The core genome of these 413 strains includes 2226 protein families involved in essential biological processes. Intraspecific genetic homogeneity was observed within each clique, each possessing a distinct genomic identity. These cliques exhibit distinct core genes and diverse subgroups, reflecting adaptation to specific environments. Contrary to traditional views, nosocomial infections by P. alloputida, P. putida, and P. monteilii have been reported, with strains showing varied antibiotic resistance profiles due to diverse mechanisms. This review enhances the taxonomic understanding of key P. putida group species using advanced population genomics approaches and provides a comprehensive understanding of their genetic diversity, ecological roles, interactions, and potential applications.}, } @article {pmid39388056, year = {2024}, author = {Vaduva, P and Bertherat, J}, title = {The molecular genetics of adrenal cushing.}, journal = {Hormones (Athens, Greece)}, volume = {}, number = {}, pages = {}, pmid = {39388056}, issn = {2520-8721}, abstract = {Adrenal Cushing represents 20% of cases of endogenous hypercorticism. Unilateral cortisol-producing adenoma (CPA), a benign tumor, and adrenocortical carcinoma (ACC), a malignant tumor, are more frequent than bilateral adrenal nodular diseases (primary bilateral macronodular adrenal hyperplasia (PBMAH) and primary pigmented nodular adrenal disease (PPNAD)).In cortisol-producing adrenal tumors, the signaling pathways mainly altered are the protein kinase A and Wnt/β-catenin pathways. Studying components of these pathways and exploring syndromic and familial cases of these tumors has historically enabled identification of many of the predisposing genes. More recently, pangenomic sequencing revealed alterations in sporadic tumors.In ACC, mainly due to TP53 alterations causing Li-Fraumeni syndrome, germline predisposition is frequent in children, while it is rare in adults. Pathogenic variants in the DNA mismatch repair genes MLH1, MSH2, MSH6, and PMS2, which cause Lynch syndrome or alterations of IGF2 and CDKN1C (11p15 locus) in Beckwith-Wiedemann syndrome, can also cause ACC. Rarely, ACC is described in other hereditary tumor syndromes due to germline pathogenic variants in MEN1 or APC and, in very rare cases, NF1, SDH, PRKAR1A, or BRCA2. Concerning ACC somatic alterations, TP53 and genetic or epigenetic alterations at the 11p15 locus are also frequently described, as well as CTNNB1 and ZNRF3 pathogenic variants.CPAs mainly harbor somatic pathogenic variants in PRKACA and CTNNB1 and, less frequently, PRKAR1A, PRKACB, or GNAS1 pathogenic variants. Isolated PBMAH is due to ARMC5 inactivating pathogenic variants in 20 to 25% of cases and to KDM1A pathogenic variants in food-dependent Cushing. Syndromic PBMAH may be due to germline pathogenic variants in MEN1, APC, or FH, causing type 1 multiple endocrine neoplasia, familial adenomatous polyposis, or hereditary leiomyomatosis-kidney cancer syndrome, respectively. PRKAR1A germline pathogenic variants are the main alteration causing PPNAD (isolated or part of Carney complex).}, } @article {pmid39390080, year = {2024}, author = {Li, W}, title = {Personalizing pangenome graphs with k-mers.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41588-024-01954-w}, pmid = {39390080}, issn = {1546-1718}, } @article {pmid39386557, year = {2024}, author = {Huang, P and Charton, F and Schmelzle, JM and Darnell, SS and Prins, P and Garrison, E and Suh, GE}, title = {Pangenome-Informed Language Models for Privacy-Preserving Synthetic Genome Sequence Generation.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.09.18.612131}, pmid = {39386557}, issn = {2692-8205}, abstract = {The public availability of genome datasets, such as The Human Genome Project (HGP), The 1000 Genomes Project, The Cancer Genome Atlas, and the International HapMap Project, has significantly advanced scientific research and medical understanding. Here our goal is to share such genomic information for downstream analysis while protecting the privacy of individuals through Differential Privacy (DP). We introduce synthetic DNA data generation based on pangenomes in combination with Pretrained-Language Models (PTLMs). We introduce two novel tokenization schemes based on pangenome graphs to enhance the modeling of DNA. We evaluated these tokenization methods, and compared them with classical single nucleotide and k -mer tokenizations. We find k -mer tokenization schemes, indicating that our tokenization schemes boost the model's performance consistency with long effective context length (covering longer sequences with the same number of tokens). Additionally, we propose a method to utilize the pangenome graph and make it comply with DP privacy standards. We assess the performance of DP training on the quality of generated sequences with discussion of the trade-offs between privacy and model accuracy. The source code for our work will be published under a free and open source license soon.}, } @article {pmid39382798, year = {2024}, author = {Ali, R and Ali, K and Aurongzeb, M and Al-Regaiey, K and Kori, JA and Irfan, M and Rashid, Y and Al Abduljabbar, D and Kaleem, I and Bashir, S}, title = {Characterization of meningitis-causing bacteria, with focus on genomic and pangenomic study of multi-drug resistant Streptococcus pneumoniae from cerebrospinal fluid.}, journal = {Antonie van Leeuwenhoek}, volume = {118}, number = {1}, pages = {16}, pmid = {39382798}, issn = {1572-9699}, mesh = {*Streptococcus pneumoniae/genetics/drug effects/isolation & purification ; Humans ; *Drug Resistance, Multiple, Bacterial/genetics ; *Phylogeny ; *Anti-Bacterial Agents/pharmacology ; *Microbial Sensitivity Tests ; *Whole Genome Sequencing ; *Genome, Bacterial ; Cerebrospinal Fluid/microbiology ; Male ; Female ; Child, Preschool ; Genomics ; Multilocus Sequence Typing ; Meningitis, Pneumococcal/microbiology/cerebrospinal fluid ; Pakistan ; Child ; Meningitis, Bacterial/microbiology/cerebrospinal fluid ; Adult ; Infant ; }, abstract = {Streptococcus pneumoniae is a major cause of meningitis in under developed countries with low vaccination rates and high antibiotic resistance. This study aimed to analyze 83 suspected meningitis patients in Karachi for the detection of S. pneumoniae followed by its whole genome sequencing and Pan Genome analysis. Out of the 83 samples collected, 33 samples with altered physical (turbidity), cytological (white blood cell count) and biochemical (total protein and total glucose concentrations) parameters indicated potential meningitis cases, while these parameters were within normal healthy ranges in remaining 50 samples. Latex particle agglutination (LPA) was performed on the 33 samples, revealing 20 positive cases of bacterial meningitis. The PCR and culturing methods revealed 5 S. pneumoniae isolates. Antibiotic susceptibility tests showed that one S. pneumoniae strain was resistant to erythromycin, levofloxacin, and tetracycline. Whole-genome sequencing of this resistant strain was performed and S. pneumoniae was confirmed with MLST analysis, while it had > 2.3 Mb genome and a single repUS43 plasmid. In CARD analysis, the strain had tet(M), ermB, RlmA(II), patB, pmrA, and patA ARGs, which could provide resistance against tetracycline, macrolide, fluoroquinolone, and glycopeptide antibiotics. Phylogenetic analysis revealed that the isolate was closely related to strains from Hungary and the USA. Pan-genome analysis with 144 genome assemblies from NCBI database showed that 1101 non-redundant core genes were shared between all strains. This study gives valuable understanding into the prevalence and characterization of meningitis-causing bacteria in Karachi, Pakistan with prime focus on multi-drug resistant S. pneumoniae.}, } @article {pmid39379804, year = {2024}, author = {Chu, N and Liu, TT and Zhang, HL and Cui, D and Huang, MT and Fu, HY and Su, JB and Gao, SJ}, title = {Complete genome sequences of two Pantoea stewartii strains ATCC 8199 from maize and PSCN1 from sugarcane.}, journal = {BMC genomic data}, volume = {25}, number = {1}, pages = {86}, pmid = {39379804}, issn = {2730-6844}, support = {CARS-17//Agriculture Research System of China/ ; }, abstract = {OBJECTIVES: The pathogen of Pantoea stewartii (Ps) is the causal agent of bacterial disease in corn and various graminaceous plants. Ps has two subspecies, Pantoea stewartii subsp. stewartia (Pss) and Pantoea stewartii subsp. indologenes (Psi). This study presents two complete genomes of Ps strains including ATCC 8199 isolated from maize and PSCN1 causing bacterial wilt in sugarcane. The two bacterial genomes information will be helpful for taxonomy analysis in this genus Pantoea at whole-genome levels and accurately discriminated the two subspecies of Pss and Psi.

DATA DESCRIPTION: The reference strain ATCC 8199 isolated from maize was purchased from Beijing Biobw Biotechnology Co., Ltd. (China) and the strain of PSCN1 was isolated from sugarcane cultivar YZ08-1095 in Zhanjiang, Guangdong province of China. Two complete genomes were sequenced using Illumina Hiseq (second-generation) and Oxford Nanopore (third-generation) platforms. The genome of the strain ATCC 8199 comprised of 4.78 Mb with an average GC content of 54.03%, along with five plasmids, encoding a total of 4,846 gene with an average gene length of 827 bp. The genome of PSCN1 comprised of 5.03 Mb with an average GC content of 53.78%, along with two plasmids, encoding a total of 4,725 gene with an average gene length of 913 bp. The bacterial pan-genome analysis highlighted the strain ATCC 8199 was clustered into a subgroup with a Pss strain CCUG 26,359 from USA, while the strain PSCN1 was clustered into another subgroup with a Ps strain NRRLB-133 from USA. These findings will serve as a useful resource for further analyses of the evolution of Ps strains and corresponding disease epidemiology worldwide.}, } @article {pmid39379381, year = {2024}, author = {Cortinovis, G and Vincenzi, L and Anderson, R and Marturano, G and Marsh, JI and Bayer, PE and Rocchetti, L and Frascarelli, G and Lanzavecchia, G and Pieri, A and Benazzo, A and Bellucci, E and Di Vittori, V and Nanni, L and Ferreira Fernández, JJ and Rossato, M and Aguilar, OM and Morrell, PL and Rodriguez, M and Gioia, T and Neumann, K and Alvarez Diaz, JC and Gratias, A and Klopp, C and Bitocchi, E and Geffroy, V and Delledonne, M and Edwards, D and Papa, R}, title = {Author Correction: Adaptive gene loss in the common bean pan-genome during range expansion and domestication.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {8715}, doi = {10.1038/s41467-024-52864-8}, pmid = {39379381}, issn = {2041-1723}, } @article {pmid39377391, year = {2024}, author = {Liu, D and Luo, C and Dai, R and Huang, X and Chen, X and He, L and Mao, H and Li, J and Zhang, L and Yang, QY and Mei, Z}, title = {AMIR: a multi-omics data platform for Asteraceae plants genetics and breeding research.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae833}, pmid = {39377391}, issn = {1362-4962}, support = {2022YFC3502200//National Key R&D Program of China/ ; 32322061//National Natural Science Foundation of China/ ; HBZY2023B00503//Seed Industry High-quality Development Project Of Hubei Province/ ; 2662023XXPY001//Fundamental Research Funds for the Central University HZAU/ ; }, abstract = {As the largest family of dicotyledon, the Asteraceae family comprises a variety of economically important crops, ornamental plants and numerous medicinal herbs. Advancements in genomics and transcriptomic have revolutionized research in Asteraceae species, generating extensive omics data that necessitate an efficient platform for data integration and analysis. However, existing databases face challenges in mining genes with specific functions and supporting cross-species studies. To address these gaps, we introduce the Asteraceae Multi-omics Information Resource (AMIR; https://yanglab.hzau.edu.cn/AMIR/), a multi-omics hub for the Asteraceae plant community. AMIR integrates diverse omics data from 74 species, encompassing 132 genomes, 4 408 432 genes annotated across seven different perspectives, 3897 transcriptome sequencing samples spanning 131 organs, tissues and stimuli, 42 765 290 unique variants and 15 662 metabolites genes. Leveraging these data, AMIR establishes the first pan-genome, comparative genomics and transcriptome system for the Asteraceae family. Furthermore, AMIR offers user-friendly tools designed to facilitate extensive customized bioinformatics analyses. Two case studies demonstrate AMIR's capability to provide rapid, reproducible and reliable analysis results. In summary, by integrating multi-omics data of Asteraceae species and developing powerful analytical tools, AMIR significantly advances functional genomics research and contributes to breeding practices of Asteraceae.}, } @article {pmid39377031, year = {2024}, author = {Zhang, X and Zhou, Y and Fu, L and Zhou, L and Cheng, X and Zhang, W and Tan, Z}, title = {WGS Analysis of Staphylococcus warneri Outbreak in a Neonatal Intensive Care Unit.}, journal = {Infection and drug resistance}, volume = {17}, number = {}, pages = {4279-4289}, pmid = {39377031}, issn = {1178-6973}, abstract = {PURPOSE: Staphylococcus warneri is an opportunistic pathogen responsible for hospital-acquired infections (HAIs). The aim of this study was to describe an outbreak caused by S. warneri infection in a neonatal intensive care unit (NICU) and provide investigation, prevention and control strategies for this outbreak.

METHODS: We conducted an epidemiological investigation of the NICU S. warneri outbreak, involving seven neonates, staff, and environmental screening, to identify the source of infection. WGS analyses were performed on S. warneri isolates, including species identification, core genome single-nucleotide polymorphism (cgSNP) analysis, pan-genome analysis, and genetic characterization assessment of the prevalence of specific antibiotic resistance and virulence genes.

RESULTS: Eight S. warneri strains were isolated from this outbreak, with seven from neonates and one from environment. Six clinical cases within three days in 2021 were linked to one strain isolated from environmental samples; isolates varied by 0-69 SNPs and were confirmed to be from an outbreak through WGS. Multiple infection prevention measures were implemented, including comprehensive environmental disinfection and stringent protocols, and all affected neonates were transferred to the isolation wards. Following these interventions, no further cases of S. warneri infections were observed. Furthermore, pan-genome analysis results suggested that in human S. warneri may exhibit host specificity.

CONCLUSION: The investigation has revealed that the outbreak was linked to the milk preparation workbench by the WGS. It is recommended that there be a stronger focus on environmental disinfection management in order to raise awareness, improve identification, and prevention of healthcare-associated infections that are associated with the hospital environment.}, } @article {pmid39376581, year = {2024}, author = {Du, Y and Qian, C and Li, X and Zheng, X and Huang, S and Yin, Z and Chen, T and Pan, L}, title = {Unveiling intraspecific diversity and evolutionary dynamics of the foodborne pathogen Bacillus paranthracis through high-quality pan-genome analysis.}, journal = {Current research in food science}, volume = {9}, number = {}, pages = {100867}, pmid = {39376581}, issn = {2665-9271}, abstract = {Understanding the evolutionary dynamics of foodborne pathogens throughout host-associated habitats is of utmost importance. Bacterial pan-genomes, as dynamic entities, are strongly influenced by ecological lifestyles. As a phenotypically diverse species in the Bacillus cereus group, Bacillus paranthracis is recognized as an emerging foodborne pathogen and a probiotic simultaneously. This poorly understood species is a suitable study model for adaptive pan-genome evolution. In this study, we determined the biogeographic distribution, abundance, genetic diversity, and genotypic profiles of key genetic elements of B. paranthracis. Metagenomic read recruitment analyses demonstrated that B. paranthracis members are globally distributed and abundant in host-associated habitats. A high-quality pan-genome of B. paranthracis was subsequently constructed to analyze the evolutionary dynamics involved in ecological adaptation comprehensively. The open pan-genome indicated a flexible gene repertoire with extensive genetic diversity. Significant divergences in the phylogenetic relationships, functional enrichment, and degree of selective pressure between the different components demonstrated different evolutionary dynamics between the core and accessory genomes driven by ecological forces. Purifying selection and gene loss are the main signatures of evolutionary dynamics in B. paranthracis pan-genome. The plasticity of the accessory genome is characterized by horizontal gene transfer (HGT), massive gene losses, and weak purifying or positive selection, which might contribute to niche-specific adaptation. In contrast, although the core genome dominantly undergoes purifying selection, its association with HGT and positively selected mutations indicates its potential role in ecological diversification. Furthermore, host fitness-related dynamics are characterized by the loss of secondary metabolite biosynthesis gene clusters (BGCs) and CAZyme-encoding genes and the acquisition of antimicrobial resistance (AMR) and virulence genes via HGT. This study offers a case study of pan-genome evolution to investigate the ecological adaptations reflected by biogeographical characteristics, thereby advancing the understanding of intraspecific diversity and evolutionary dynamics of foodborne pathogens.}, } @article {pmid39372902, year = {2024}, author = {Moens, C and Bogaerts, B and Lorente-Leal, V and Vanneste, K and De Keersmaecker, SCJ and Roosens, NHC and Mostin, L and Fretin, D and Marché, S}, title = {Genomic comparison between Mycobacterium bovis and Mycobacterium microti and in silico analysis of peptide-based biomarkers for serodiagnosis.}, journal = {Frontiers in veterinary science}, volume = {11}, number = {}, pages = {1446930}, pmid = {39372902}, issn = {2297-1769}, abstract = {In recent years, there has been an increase in the number of reported cases of Mycobacterium microti infection in various animals, which can interfere with the ante-mortem diagnosis of animal tuberculosis caused by Mycobacterium bovis. In this study, whole genome sequencing (WGS) was used to search for protein-coding genes to distinguish M. microti from M. bovis. In addition, the population structure of the available M. microti genomic WGS datasets is described, including three novel Belgian isolates from infections in alpacas. Candidate genes were identified by examining the presence of the regions of difference and by a pan-genome analysis of the available WGS data. A total of 80 genes showed presence-absence variation between the two species, including genes encoding Proline-Glutamate (PE), Proline-Proline-Glutamate (PPE), and Polymorphic GC-Rich Sequence (PE-PGRS) proteins involved in virulence and host interaction. Filtering based on predicted subcellular localization, sequence homology and predicted antigenicity resulted in 28 proteins out of 80 that were predicted to be potential antigens. As synthetic peptides are less costly and variable than recombinant proteins, an in silico approach was performed to identify linear and discontinuous B-cell epitopes in the selected proteins. From the 28 proteins, 157 B-cell epitope-based peptides were identified that discriminated between M. bovis and M. microti species. Although confirmation by in vitro testing is still required, these candidate synthetic peptides containing B-cell epitopes could potentially be used in serological tests to differentiate cases of M. bovis from M. microti infection, thus reducing misdiagnosis in animal tuberculosis surveillance.}, } @article {pmid39372800, year = {2024}, author = {Ford, MKB and Hari, A and Zhou, Q and Numanagić, I and Sahinalp, SC}, title = {Biologically-informed Killer cell immunoglobulin-like receptor (KIR) gene annotation tool.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.13.607835}, pmid = {39372800}, issn = {2692-8205}, abstract = {Natural killer (NK) cells are essential components of the innate immune system, with their activity significantly regulated by Killer cell Immunoglobulin-like Receptors (KIRs). The diversity and structural complexity of KIR genes present significant challenges for accurate genotyping, essential for understanding NK cell functions and their implications in health and disease. Traditional genotyping methods struggle with the variable nature of KIR genes, leading to inaccuracies that can impede immunogenetic research. These challenges extend to high-quality phased assemblies, which have been recently popularized by the Human Pangenome Consortium. This paper introduces BAKIR (Biologically-informed Annotator for KIR locus), a tailored computational tool designed to overcome the challenges of KIR genotyping and annotation on high-quality, phased genome assemblies. BAKIR aims to enhance the accuracy of KIR gene annotations by structuring its annotation pipeline around identifying key functional mutations, thereby improving the identification and subsequent relevance of gene and allele calls. It uses a multi-stage mapping, alignment, and variant calling process to ensure high-precision gene and allele identification, while also maintaining high recall for sequences that are significantly mutated or truncated relative to the known allele database. BAKIR has been evaluated on a subset of the HPRC assemblies, where BAKIR was able to improve many of the associated annotations and call novel variants. BAKIR is freely available on GitHub, offering ease of access and use through multiple installation methods, including pip, conda, and singularity container, and is equipped with a user-friendly command-line interface, thereby promoting its adoption in the scientific community.}, } @article {pmid39372794, year = {2024}, author = {Logsdon, GA and Ebert, P and Audano, PA and Loftus, M and Porubsky, D and Ebler, J and Yilmaz, F and Hallast, P and Prodanov, T and Yoo, D and Paisie, CA and Harvey, WT and Zhao, X and Martino, GV and Henglin, M and Munson, KM and Rabbani, K and Chin, CS and Gu, B and Ashraf, H and Austine-Orimoloye, O and Balachandran, P and Bonder, MJ and Cheng, H and Chong, Z and Crabtree, J and Gerstein, M and Guethlein, LA and Hasenfeld, P and Hickey, G and Hoekzema, K and Hunt, SE and Jensen, M and Jiang, Y and Koren, S and Kwon, Y and Li, C and Li, H and Li, J and Norman, PJ and Oshima, KK and Paten, B and Phillippy, AM and Pollock, NR and Rausch, T and Rautiainen, M and Scholz, S and Song, Y and Soylev, A and Sulovari, A and Surapaneni, L and Tsapalou, V and Zhou, W and Zhou, Y and Zhu, Q and Zody, MC and Mills, RE and Devine, SE and Shi, X and Talkowski, ME and Chaisson, MJP and Dilthey, AT and Konkel, MK and Korbel, JO and Lee, C and Beck, CR and Eichler, EE and Marschall, T}, title = {Complex genetic variation in nearly complete human genomes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.09.24.614721}, pmid = {39372794}, issn = {2692-8205}, abstract = {Diverse sets of complete human genomes are required to construct a pangenome reference and to understand the extent of complex structural variation. Here, we sequence 65 diverse human genomes and build 130 haplotype-resolved assemblies (130 Mbp median continuity), closing 92% of all previous assembly gaps and reaching telomere-to-telomere (T2T) status for 39% of the chromosomes. We highlight complete sequence continuity of complex loci, including the major histocompatibility complex (MHC), SMN1/SMN2, NBPF8, and AMY1/AMY2, and fully resolve 1,852 complex structural variants (SVs). In addition, we completely assemble and validate 1,246 human centromeres. We find up to 30-fold variation in α-satellite high-order repeat (HOR) array length and characterize the pattern of mobile element insertions into α-satellite HOR arrays. While most centromeres predict a single site of kinetochore attachment, epigenetic analysis suggests the presence of two hypomethylated regions for 7% of centromeres. Combining our data with the draft pangenome reference significantly enhances genotyping accuracy from short-read data, enabling whole-genome inference to a median quality value (QV) of 45. Using this approach, 26,115 SVs per sample are detected, substantially increasing the number of SVs now amenable to downstream disease association studies.}, } @article {pmid39372495, year = {2024}, author = {Karthik, K and Anbazhagan, S and Priyadharshini, MLM and Sharma, RK and Manoharan, S}, title = {Comparative genomics of zoonotic pathogen Clostridioides difficile of animal origin to understand its diversity.}, journal = {3 Biotech}, volume = {14}, number = {11}, pages = {257}, pmid = {39372495}, issn = {2190-572X}, abstract = {UNLABELLED: Clostridioides difficile, a zoonotic pathogen causing enteric diseases in different animals and humans. A comprehensive study on the presence of toxin genes and antimicrobial resistance genes based on genome data of C. difficile in animals is scanty. In the present study, a total of 15 C. difficile isolates were recovered from dogs and isolates with toxin genes (D1, CD15 and CD26) along with two other non-toxigenic strains (CD28, CD32) were used for whole genome sequencing and comparative genomics. Sequence type-based clustering was noted in the whole genome phylogeny with 4 known multi-locus sequence typing (MLST) clades namely I, II, IV, and V and a cryptic clade. ST11 and ST54 were reported for the 2[nd] time worldwide in dogs. Out of 109 genomes used in the study, 29 genomes were predicted with all four toxin genes (toxA, toxB, cdtA, cdtB) while 22 did not have any of the toxin genes. ST11 of MLST clade V had the maximum number of 46 genomes predicted with at least one toxin gene. Among the genomes sequenced in this study, CD26 had a maximum of 5 AMR genes (aac(6')-aph(2″), ant(6)-Ia, catP, erm(B)_18, and tet(M)_11) and CD15 was predicted with 2 AMR genes (aac(6')-aph(2″), erm(B)_18). Tetracycline resistance genes were predicted most in the ST11 genome. Of the 22 non-toxigenic strains, 9 genomes (ST48 = 5, ST3 = 2, ST109 = 1, ST15 = 1) were predicted with a minimum of one AMR gene. Pangenome analysis indicated that the Bpan value is 0.12 showing that C. difficile has an open pangenome structure. This indicates that the organism can evolve by the addition of new genes. This study reports the circulation of clinically important ST11 and multidrug-resistant non-toxigenic strains among animals.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-024-04102-7.}, } @article {pmid39367490, year = {2024}, author = {Liu, J and Shi, Y and Mo, D and Luo, L and Xu, S and Lv, F}, title = {The goat pan-genome reveals patterns of gene loss during domestication.}, journal = {Journal of animal science and biotechnology}, volume = {15}, number = {1}, pages = {132}, pmid = {39367490}, issn = {1674-9782}, abstract = {BACKGROUND: Unveiling genetic diversity features and understanding the genetic mechanisms of diverse goat phenotypes are pivotal in facilitating the preservation and utilization of these genetic resources. However, the total genetic diversity within a species can't be captured by the reference genome of a single individual. The pan-genome is a collection of all the DNA sequences that occur in a species, and it is expected to capture the total genomic diversity of the specific species.

RESULTS: We constructed a goat pan-genome using map-to-pan assemble based on 813 individuals, including 723 domestic goats and 90 samples from their wild relatives, which presented a broad regional and global representation. In total, 146 Mb sequences and 974 genes were identified as absent from the reference genome (ARS1.2; GCF_001704415.2). We identified 3,190 novel single nucleotide polymorphisms (SNPs) using the pan-genome analysis. These novel SNPs could properly reveal the population structure of domestic goats and their wild relatives. Presence/absence variation (PAV) analysis revealed gene loss and intense negative selection during domestication and improvement.

CONCLUSIONS: Our research highlights the importance of the goat pan-genome in capturing the missing genetic variations. It reveals the changes in genomic architecture during goat domestication and improvement, such as gene loss. This improves our understanding of the evolutionary and breeding history of goats.}, } @article {pmid39367302, year = {2024}, author = {Mejía-Limones, I and Andrade-Molina, D and Morey-León, G and Hidalgo-Olmedo, JC and Chang-Asinc, JG and Fernández-Cadena, JC and Rojas, M}, title = {Whole-genome sequencing of Klebsiella pneumoniae MDR circulating in a pediatric hospital setting: a comprehensive genome analysis of isolates from Guayaquil, Ecuador.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {928}, pmid = {39367302}, issn = {1471-2164}, mesh = {Humans ; Ecuador ; *Whole Genome Sequencing ; *Klebsiella pneumoniae/genetics/isolation & purification/drug effects ; *Drug Resistance, Multiple, Bacterial/genetics ; *Phylogeny ; *Hospitals, Pediatric ; Child ; *Genome, Bacterial ; Klebsiella Infections/microbiology/epidemiology ; Virulence Factors/genetics ; Multilocus Sequence Typing ; Child, Preschool ; Infant ; Genetic Variation ; }, abstract = {BACKGROUND: Klebsiella pneumoniae is the major cause of nosocomial infections worldwide and is related to a worsening increase in Multidrug-Resistant Bacteria (MDR) and virulence genes that seriously affect immunosuppressed patients, long-stay intensive care patients, elderly individuals, and children. Whole-Genome Sequencing (WGS) has resulted in a useful strategy for characterizing the genomic components of clinically important bacteria, such as K. pneumoniae, enabling them to monitor genetic changes and understand transmission, highlighting the risk of dissemination of resistance and virulence associated genes in hospitals. In this study, we report on WGS 14 clinical isolates of K. pneumoniae from a pediatric hospital biobank of Guayaquil, Ecuador.

RESULTS: The main findings revealed pronounced genetic heterogeneity among the isolates. Multilocus sequencing type ST45 was the predominant lineage among non-KPC isolates, whereas ST629 was found more frequently among KPC isolates. Phylogenetic analysis suggested local transmission dynamics. Comparative genomic analysis revealed a core set of 3511 conserved genes and an open pangenome in neonatal isolates. The diversity of MLSTs and capsular types, and the high genetic diversity among these isolates indicate high intraspecific variability. In terms of virulence factors, we identified genes associated with adherence, biofilm formation, immune evasion, secretion systems, multidrug efflux pump transporters, and a notably high number of genes related to iron uptake. A large number of these genes were detected in the ST45 isolate, whereas iron uptake yersiniabactin genes were found exclusively in the non-KPC isolates. We observed high resistance to commonly used antibiotics and determined that these isolates exhibited multidrug resistance including β-lactams, aminoglycosides, fluoroquinolones, quinolones, trimetropins, fosfomycin and macrolides; additionally, resistance-associated point mutations and cross-resistance genes were identified in all the isolates. We also report the first K. pneumoniae KPC-3 gene producers in Ecuador.

CONCLUSIONS: Our WGS results for clinical isolates highlight the importance of MDR in neonatal K. pneumoniae infections and their genetic diversity. WGS will be an imperative strategy for the surveillance of K. pneumoniae in Ecuador, and will contribute to identifying effective treatment strategies for K. pneumoniae infections in critical units in patients at stratified risk.}, } @article {pmid39365807, year = {2024}, author = {Nagy, N and Hodor, P}, title = {Chromosomal gene order defines several structural classes of Staphylococcus epidermidis genomes.}, journal = {PloS one}, volume = {19}, number = {10}, pages = {e0311520}, doi = {10.1371/journal.pone.0311520}, pmid = {39365807}, issn = {1932-6203}, mesh = {*Staphylococcus epidermidis/genetics/classification ; *Genome, Bacterial ; Gene Order ; Chromosomes, Bacterial/genetics ; }, abstract = {The original methodology for describing the pangenome of a prokaryotic species is based on modeling genomes as unordered sets of genes. More recent findings have underlined the importance of considering the ordering of genes along the genetic material as well, when making comparisons among genomes. To further investigate the benefits of gene order when describing genomes of a given species, we applied two distance metrics on a dataset of 84 genomes of Staphylococcus epidermidis. The first metric, GeLev, depends on the order of genes and is a derivative of the Levenshtein distance. The second, the Jaccard distance, depends on gene sets only. The application of these distances reveals information about the global structure of the genomes, and allows clustering of the genomes into classes. The main biological result is that, while genomes within the same class are structurally similar, genomes of different classes have an additional characteristic. Between genomes in different classes we can discover instances where a large segment of the first genome appears in reverse order in the second. This feature suggests that genome rearrangements in S. epidermidis happen on a large scale, while micro-rearrangements of single or a small number of genes are rare. Thus, this paper describes a straight-forward method to classify genomes into structural classes with the same order of genes and makes it possible to visualize reversed segments in pairs of genomes. The method can be readily applied to other species.}, } @article {pmid39365060, year = {2024}, author = {Neal, M and Brakewood, W and Betenbaugh, M and Zengler, K}, title = {Pan-genome-scale metabolic modeling of Bacillus subtilis reveals functionally distinct groups.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0092324}, doi = {10.1128/msystems.00923-24}, pmid = {39365060}, issn = {2379-5077}, abstract = {UNLABELLED: Bacillus subtilis is an important industrial and environmental microorganism known to occupy many niches and produce many compounds of interest. Although it is one of the best-studied organisms, much of this focus including the reconstruction of genome-scale metabolic models has been placed on a few key laboratory strains. Here, we substantially expand these prior models to pan-genome-scale, representing 481 genomes of B. subtilis with 2,315 orthologous gene clusters, 1,874 metabolites, and 2,239 reactions. Furthermore, we incorporate data from carbon utilization experiments for eight strains to refine and validate its metabolic predictions. This comprehensive pan-genome model enables the assessment of strain-to-strain differences related to nutrient utilization, fermentation outputs, robustness, and other metabolic aspects. Using the model and phenotypic predictions, we divide B. subtilis strains into five groups with distinct patterns of behavior that correlate across these features. The pan-genome model offers deep insights into B. subtilis' metabolism as it varies across environments and provides an understanding as to how different strains have adapted to dynamic habitats.

IMPORTANCE: As the volume of genomic data and computational power have increased, so has the number of genome-scale metabolic models. These models encapsulate the totality of metabolic functions for a given organism. Bacillus subtilis strain 168 is one of the first bacteria for which a metabolic network was reconstructed. Since then, several updated reconstructions have been generated for this model microorganism. Here, we expand the metabolic model for a single strain into a pan-genome-scale model, which consists of individual models for 481 B. subtilis strains. By evaluating differences between these strains, we identified five distinct groups of strains, allowing for the rapid classification of any particular strain. Furthermore, this classification into five groups aids the rapid identification of suitable strains for any application.}, } @article {pmid39364168, year = {2024}, author = {Ajesh, BR and Sariga, R and Nakkeeran, S and Renukadevi, P and Saranya, N and Alkahtani, S}, title = {Insights on mining the pangenome of Sphingobacterium thalpophilum NMS02 S296 from the resistant banana cultivar Pisang lilin confirms the antifungal action against Fusarium oxysporum f. sp. cubense.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1443195}, pmid = {39364168}, issn = {1664-302X}, abstract = {INTRODUCTION: Fusarium wilt, caused by Fusarium oxysporum f. sp. cubense (Foc), poses a significant global threat to banana cultivation. Conventional methods of disease management are increasingly challenged, thus making it necessary to explore alternative strategies. Bacterial endophytes, particularly from resistant genotypes, are gaining attention as potential biocontrol agents. Sphingobacterium thalpophilum, isolated from the resistant banana cultivar Pisang lilin (JALHSB010000001-JALHSB010000029), presents an intriguing prospect for combating Fusarium wilt. However, its underlying biocontrol mechanisms remain poorly understood. This study aimed to elucidate the antifungal efficacy of S. thalpophilum NMS02 S296 against Foc and explore its biocontrol mechanisms at the genomic level.

METHODS: Whole genome sequencing of S. thalpophilum NMS02 S296 was conducted using next-generation sequencing technologies and bioinformatics analyses were performed to identify genes associated with antifungal properties. In vitro assays were used to assess the inhibitory effects of the bacterial isolate on the mycelial growth of Foc. To explore the biomolecules responsible for the observed antagonistic activity, metabolites diffused into the agar at the zone of inhibition between Foc S16 and S. thalpophilum NMS02 S296 were extracted and identified.

RESULTS: Whole genome sequencing revealed an array of genes encoding antifungal enzymes and secondary metabolites in S. thalpophilum NMS02 S296. In vitro experiments demonstrated significant inhibition of Foc mycelial growth by the bacterial endophyte. Comparative genomic analysis highlighted unique genomic features in S. thalpophilum linked to its biocontrol potential, setting it apart from other bacterial species.

DISCUSSION: The study underscores the remarkable antifungal efficacy of S. thalpophilum NMS02 S296 against Fusarium wilt. The genetic basis for its biocontrol potential was elucidated through whole genome sequencing, shedding light on the mechanisms behind its antifungal activity. This study advanced our understanding of bacterial endophytes as biocontrol agents and offers a promising avenue for plant growth promotion towards sustainable strategies to mitigate Fusarium wilt in banana cultivation.}, } @article {pmid39361595, year = {2024}, author = {Vogel, NA and Rubin, JD and Pedersen, AG and Sackett, PW and Pedersen, MW and Renaud, G}, title = {soibean: High-resolution Taxonomic Identification of Ancient Environmental DNA Using Mitochondrial Pangenome Graphs.}, journal = {Molecular biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/molbev/msae203}, pmid = {39361595}, issn = {1537-1719}, abstract = {Ancient environmental DNA (aeDNA) is becoming a powerful tool to gain insights about past ecosystems, overcoming the limitations of conventional fossil records. However, several methodological challenges remain, particularly for classifying the DNA to species level and conducting phylogenetic analysis. Current methods, primarily tailored for modern datasets, fail to capture several idiosyncrasies of aeDNA, including species mixtures from closely related species and ancestral divergence. We introduce soibean, a novel tool that utilises mitochondrial pangenomic graphs for identifying species from aeDNA reads. It outperforms existing methods in accurately identifying species from multiple closely related sources within a sample, enhancing phylogenetic analysis for aeDNA. soibean employs a damage-aware likelihood model for precise identification at low coverage with a high damage rate. Additionally, we reconstructed ancestral sequences for soibean's database to handle aeDNA that is highly diverged from modern references. soibean demonstrates effectiveness through simulated data tests and empirical validation. Notably, our method uncovered new empirical results in published datasets, including using porpoise whales as food in a Mesolithic community in Sweden, demonstrating its potential to reveal previously unrecognised findings in aeDNA studies.}, } @article {pmid39353429, year = {2024}, author = {Shoer, S and Reicher, L and Zhao, C and Pollard, KS and Pilpel, Y and Segal, E}, title = {Pangenomes of human gut microbiota uncover links between genetic diversity and stress response.}, journal = {Cell host & microbe}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.chom.2024.08.017}, pmid = {39353429}, issn = {1934-6069}, abstract = {The genetic diversity of the gut microbiota has a central role in host health. Here, we created pangenomes for 728 human gut prokaryotic species, quadrupling the genes of strain-specific genomes. Each of these species has a core set of a thousand genes, differing even between closely related species, and an accessory set of genes unique to the different strains. Functional analysis shows high strain variability associates with sporulation, whereas low variability is linked with antibiotic resistance. We further map the antibiotic resistome across the human gut population and find 237 cases of extreme resistance even to last-resort antibiotics, with a predominance among Enterobacteriaceae. Lastly, the presence of specific genes in the microbiota relates to host age and sex. Our study underscores the genetic complexity of the human gut microbiota, emphasizing its significant implications for host health. The pangenomes and antibiotic resistance map constitute a valuable resource for further research.}, } @article {pmid39353372, year = {2024}, author = {Li, Q and Yang, J and Wang, M and Jia, R and Chen, S and Liu, M and Zhu, D and Zhao, X and Wu, Y and Yang, Q and Huang, J and Ou, X and Sun, D and Tian, B and He, Y and Wu, Z and Cheng, A and Zhang, S}, title = {Global distribution and genomic characteristics analysis of avian-derived mcr-1-positive Escherichia coli.}, journal = {Ecotoxicology and environmental safety}, volume = {285}, number = {}, pages = {117109}, doi = {10.1016/j.ecoenv.2024.117109}, pmid = {39353372}, issn = {1090-2414}, abstract = {The prevalence of avian-derived Escherichia coli (E. coli) carrying mcr-1 poses a significant threat to the development of the poultry industry and public health safety. Despite ongoing in-depth epidemiological research worldwide, a comprehensive macroscopic study based on genomics is still lacking. In response, this study collected 1104 genomic sequences of avian-derived mcr-1-positive E. coli (MCRPEC) from the NCBI public database, covering 31 countries. The majority of sequences originated from China (48.82 %), followed by the Netherlands (10.41 %). In terms of avian hosts, chicken accounted for the largest proportion (44.11 %), followed by gallus (24.09 %). Avian-derived MCRPEC also serves as a reservoir for other antibiotic resistance genes (ARGs), with 179 ARGs coexisting with mcr-1 identified. A total of 206 virulence-associated genes were also identified, revealing the pathogenic risks of MCRPEC. Pan-genome analysis revealed that avian-derived MCRPEC from different hosts, countries of origin, and serotypes exhibit minor SNP differences, indicating a high risk of cross-regional and cross-host transmission. The ST types of MCRPRC are diverse, with ST10 being the most prevalent (n=70). Spearman analysis showed a significant correlation between the number of ARGs and the insertion sequences (ISs) as well as plasmid replicon in ST10 strains. Furthermore, ST10 strains share a similar genetic basis with human-derived MCRPEC, suggesting the possibility of clonal dissemination. Pan-genome-wide association studies (pan-GWAS) indicated that the differential genes of MCRPEC from different countries and host sources are significantly different, mainly related to genes encoding type IV secretion systems and mobile genetic elements (MGEs). Plasmid mapping of showed that the prevalent plasmid types vary by country and host, with IncI2 and IncX4 being the main mcr-1-positive plasmids. Among the 12 identified mcr-1 genetic contexts with ISs, the Tn6330 transposon was the predominant carrier of mcr-1. In summary, the potential threat of avian-derived MCRPEC cannot be ignored, and long-term and comprehensive monitoring are essential.}, } @article {pmid39352766, year = {2024}, author = {Ling, X and Gu, X and Shen, Y and Fu, C and Zhou, Y and Yin, Y and Gao, Y and Zhu, Y and Lou, Y and Zheng, M}, title = {Comparative genomic analysis of Acanthamoeba from different sources and horizontal transfer events of antimicrobial resistance genes.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0054824}, doi = {10.1128/msphere.00548-24}, pmid = {39352766}, issn = {2379-5042}, abstract = {UNLABELLED: Acanthamoeba species are among the most common free-living amoeba and ubiquitous protozoa, mainly distributed in water and soil, and cause Acanthamoeba keratitis (AK) and severe visual impairment in patients. Although several studies have reported genomic characteristics of Acanthamoeba, limited sample sizes and sources have resulted in an incomplete understanding of the genetic diversity of Acanthamoeba from different sources. While endosymbionts exert a significant influence on the phenotypes of Acanthamoeba, including pathogenicity, virulence, and drug resistance, the species diversity and functional characterization remain largely unexplored. Herein, our study sequenced and analyzed the whole genomes of 19 Acanthamoeba pathogenic strains that cause AK, and by integrating publicly available genomes, we sampled 29 Acanthamoeba strains from ocular, environmental, and other sources. Combined pan-genomic and comparative functional analyses revealed genetic differences and evolutionary relationships among the different sources of Acanthamoeba, as well as classification into multiple functional groups, with ocular isolates in particular showing significant differences that may account for differences in pathogenicity. Phylogenetic and rhizome gene mosaic analyses of ocular Acanthamoeba strains suggested that genomic exchanges between Acanthamoeba and endosymbionts, particularly potential antimicrobial resistance genes trafficking including the adeF, amrA, and amrB genes exchange events, potentially contribute to Acanthamoeba drug resistance. In conclusion, this study elucidated the adaptation of Acanthamoeba to different ecological niches and the influence of gene exchange on the evolution of ocular Acanthamoeba genome, guiding the clinical diagnosis and treatment of AK and laying a theoretical groundwork for developing novel therapeutic approaches.

IMPORTANCE: Acanthamoeba causes a serious blinding keratopathy, Acanthamoeba keratitis, which is currently under-recognized by clinicians. In this study, we analyzed 48 strains of Acanthamoeba using a whole-genome approach, revealing differences in pathogenicity and function between strains of different origins. Horizontal transfer events of antimicrobial resistance genes can help provide guidance as potential biomarkers for the treatment of specific Acanthamoeba keratitis cases.}, } @article {pmid39358614, year = {2024}, author = {Che, J and Lai, C and Lai, G and Chen, B and He, L and Liu, B}, title = {Complete genome sequence analysis and Pks genes identification of Brevibacillus brevis FJAT-0809-GLX with a broad inhibitory spectrum against phytopathogens.}, journal = {World journal of microbiology & biotechnology}, volume = {40}, number = {11}, pages = {332}, pmid = {39358614}, issn = {1573-0972}, abstract = {Brevibacillus brevis FJAT-0809-GLX has a broad spectrum of antimicrobial activity. Understanding the molecular basis of biocontrol ability of B. brevis will allow us to develop effective microbial agents for sustainable agriculture. In this study, we present the complete and annotated genome sequence of FJAT-0809-GLX. The complete genome size of B. brevis FJAT-0809-GLX was 6,137,019 bp, with 5688 predicted coding sequences (CDS). The average GC content of 47.38%, and there were 44 copies of the rRNAs operon (16S, 23S and 5S RNA), and 127 tRNA genes. A total of 11,162 genes were functionally annotated with the COG, GO, and KEGG databases, and 123 genes belonged to CAZymes. Genomic secondary metabolite analysis indicated 13 clusters encoding potential new antimicrobials. FJAT-0809-GLX was designated as B. brevis according to average nucleotide polymorphism (ANI) and phylogenetic analysis. The pangenome consisted of 7141 homologous genes, and 4469 homologous genes shared by B. brevis FJAT-0809-GLX, B. brevis NBRC100599, B. brevis DSM30, and B. brevis NCTC2611. The number of unique homologous genes of B. brevis FJAT-0809-GLX (419 genes) and B. brevis NBRC100599 (480 genes) were much more than those in B. brevis DSM30 (13 genes), and B. brevis NCTC2611 (6 genes). Nine gene clusters encoding for secondary metabolite biosynthesis were compared in the genome of B. brevis FJAT-0809-GLX with those of B. brevis NBRC100599, B. brevis DSM30 and B. brevis NCTC2611, and the gene clusters encoding for lantipeptide and transatpks-otherks only existed in genome of B. brevis FJAT-0809-GLX. The 11 BbPks genes were included in the B. brevis FJAT-0809-GLX genome, which contained the conserved PS-DH domain. The relative expression of BbPksL, BbPksM2, BbPksM3, BbPksN3, BbPksN4 and BbPksN5 reached a maximum at 120 h and then decreased at 144 h. Our results provided detailed genomic and Pks genes information for the FJAT-0809-GLX strain, and lid a foundation for studying its biocontrol mechanisms.}, } @article {pmid39354948, year = {2024}, author = {Tong, Z and Huang, Y and Zhu, QH and Fan, L and Xiao, B and Shen, E}, title = {Retrospect and prospect of Nicotiana tabacum genome sequencing.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1474658}, pmid = {39354948}, issn = {1664-462X}, abstract = {Investigating plant genomes offers crucial foundational resources for exploring various aspects of plant biology and applications, such as functional genomics and breeding practices. With the development in sequencing and assembly technology, several Nicotiana tabacum genomes have been published. In this paper, we reviewed the progress on N. tabacum genome assembly and quality, from the initial draft genomes to the recent high-quality chromosome-level assemblies. The application of long-read sequencing, optical mapping, and Hi-C technologies has significantly improved the contiguity and completeness of N. tabacum genome assemblies, with the latest assemblies having a contig N50 size over 50 Mb. Despite these advancements, further improvements are still required and possible, particularly on the development of pan-genome and telomere-to-telomere (T2T) genomes. These new genomes will capture the genomic diversity and variations among different N. tabacum cultivars and species, and provide a comprehensive view of the N. tabacum genome structure and gene content, so to deepen our understanding of the N. tabacum genome and facilitate precise breeding and functional genomics.}, } @article {pmid39348459, year = {2024}, author = {Kalbfleisch, TS and Smith, ML and Ciosek, JL and Li, K and Doris, PA}, title = {Three decades of rat genomics: approaching the finish(ed) line.}, journal = {Physiological genomics}, volume = {}, number = {}, pages = {}, doi = {10.1152/physiolgenomics.00110.2024}, pmid = {39348459}, issn = {1531-2267}, support = {R01HG011252//HHS | NIH | National Human Genome Research Institute (NHGRI)/ ; }, abstract = {The rat, Rattus norvegicus, has provided an important model for investigation of a range of characteristics of biomedical importance. Here we survey the origins of this species, its introduction into laboratory research and the emergence of genetic and genomic methods that utilize this model organism. Genomic studies have yielded important progress and provided new insight into several biologically important traits. However, some studies have been impeded by the lack of a complete and accurate reference genome for this species. New sequencing and genome assembly methods applied to the rat have resulted in a new reference genome assembly, GRCr8, which is a near telomere-to-telomere assembly of high base level accuracy that incorporates several elements not captured in prior assemblies. As genome assembly methods continue to advance and production costs become a less significant obstacle, genome assemblies for multiple inbred rat strains are emerging. These assemblies will allow a rat pangenome assembly to be constructed which captures all the genetic variation in strains selected for their utility in research and will overcome reference bias, a limitation associated with reliance on a single reference assembly. By this means, the full utility of this model organism to genomic studies will begin to be revealed.}, } @article {pmid39345401, year = {2024}, author = {Mastoras, M and Asri, M and Brambrink, L and Hebbar, P and Kolesnikov, A and Cook, DE and Nattestad, M and Lucas, J and Won, TS and Chang, PC and Carroll, A and Paten, B and Shafin, K}, title = {Highly accurate assembly polishing with DeepPolisher.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.09.17.613505}, pmid = {39345401}, issn = {2692-8205}, abstract = {Accurate genome assemblies are essential for biological research, but even the highest quality assemblies retain errors caused by the technologies used to construct them. Base-level errors are typically fixed with an additional polishing step that uses reads aligned to the draft assembly to identify necessary edits. However, current methods struggle to find a balance between over- and under-polishing. Here, we present an encoder-only transformer model for assembly polishing called DeepPolisher, which predicts corrections to the underlying sequence using Pacbio HiFi read alignments to a diploid assembly. Our pipeline introduces a method, PHARAOH (Phasing Reads in Areas Of Homozygosity), which uses ultra-long ONT data to ensure alignments are accurately phased and to correctly introduce heterozygous edits in falsely homozygous regions. We demonstrate that the DeepPolisher pipeline can reduce assembly errors by half, with a greater than 70% reduction in indel errors. We have applied our DeepPolisher-based pipeline to 180 assemblies from the next Human Pangenome Reference Consortium (HPRC) data release, producing an average predicted Quality Value (QV) improvement of 3.4 (54% error reduction) for the majority of the genome.}, } @article {pmid39345150, year = {2024}, author = {Xu, A and Lu, L and Zhang, W and Song, X and Li, G and Miao, Y and Li, R and Chen, M and Liu, Q and Li, D}, title = {Microevolution of Bartonella grahamii driven by geographic and host factors.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0108924}, doi = {10.1128/msystems.01089-24}, pmid = {39345150}, issn = {2379-5077}, abstract = {UNLABELLED: Bartonella grahamii is one of the most prevalent Bartonella species in wild rodents and has been associated with human cases of neuroretinitis. The structure and distribution of genomic diversity in natural B. grahamii is largely unexplored. Here, we have applied a comprehensive population genomic and phylogenomic analysis to 172 strains of B. grahamii to unravel the genetic differences and influencing factors that shape its populations. The findings reveal a remarkable genomic diversity within the species, primarily in the form of single- nucleotide polymorphisms. The open pangenome of B. grahamii indicates a dynamic genomic evolution influenced by its ecological niche. Whole-genome data allowed us to decompose B. grahamii diversity into six phylogroups, each characterized by a unique "mosaic pattern" of hosts and biogeographic regions. This suggests a complex interplay between host specificity and biogeography. In addition, our study suggests a possible origin of European strains from Asian lineages, and host factors have a more significant impact on the genetic differentiation of B. grahamii than geographical factors. These insights contribute to understanding the evolutionary history of this pathogen and provide a foundation for future epidemiological research and public health strategies.

IMPORTANCE: Bartonella grahamii has been reported worldwide and shown to infect humans. Up to now, an effective transmission route of B. grahamii to humans has not been confirmed. The genetic evolution of B. grahamii and the relationship between B. grahamii and its host need to be further studied. The factors driving the genetic diversity of B. grahamii are still controversial. The results showed that the European isolates shared a common ancestor with the Chinese isolates. Host factors were shown to play an important role in driving the genetic diversity of B. grahamii. When host factors were fixed, geographic barriers drove B. grahamii microevolution. Our study emphasizes the importance of characterizing isolate genomes derived from hosts and geographical locations and provides a new reference for the origin of B. grahamii.}, } @article {pmid39343881, year = {2024}, author = {Zhao, Z and Zhu, Z and Jiao, Y and Zhang, G}, title = {Pan-genome analysis of GT64 gene family and expression response to Verticillium wilt in cotton.}, journal = {BMC plant biology}, volume = {24}, number = {1}, pages = {893}, pmid = {39343881}, issn = {1471-2229}, support = {CB2023A19//the National Key Laboratory of Cotton Bio-breeding and Integrated Utilization/ ; CB2023A19//the National Key Laboratory of Cotton Bio-breeding and Integrated Utilization/ ; CB2023A19//the National Key Laboratory of Cotton Bio-breeding and Integrated Utilization/ ; CB2023A19//the National Key Laboratory of Cotton Bio-breeding and Integrated Utilization/ ; 2019CB008//the Project of Innovation Team Building in Key Areas of Xinjiang Production and Construction Corps/ ; 2019CB008//the Project of Innovation Team Building in Key Areas of Xinjiang Production and Construction Corps/ ; 2019CB008//the Project of Innovation Team Building in Key Areas of Xinjiang Production and Construction Corps/ ; 2019CB008//the Project of Innovation Team Building in Key Areas of Xinjiang Production and Construction Corps/ ; }, mesh = {*Gossypium/genetics/microbiology ; *Plant Diseases/microbiology/genetics ; *Verticillium/physiology ; *Multigene Family ; *Phylogeny ; *Disease Resistance/genetics ; Gene Expression Regulation, Plant ; Genome, Plant ; Plant Proteins/genetics/metabolism ; Genes, Plant ; Glycosyltransferases/genetics/metabolism ; }, abstract = {BACKGROUND: The GT64 subfamily, belonging to the glycosyltransferase family, plays a critical function in plant adaptation to stress conditions and the modulation of plant growth, development, and organogenesis processes. However, a comprehensive identification and systematic analysis of GT64 in cotton are still lacking.

RESULTS: This study used bioinformatics techniques to conduct a detailed investigation on the GT64 gene family members of eight cotton species for the first time. A total of 39 GT64 genes were detected, which could be classified into five subfamilies according to the phylogenetic tree. Among them, six genes were found in upland cotton. Furthermore, investigated the precise chromosomal positions of these genes and visually represented their gene structure details. Moreover, forecasted cis-regulatory elements in GhGT64s and ascertained the duplication type of the GT64 in the eight cotton species. Evaluation of the Ka/Ks ratio for similar gene pairs among the eight cotton species provided insights into the selective pressures acting on these homologous genes. Additionally, analyzed the expression profiles of the GT64 gene family. Overexpressing GhGT64_4 in tobacco improved its disease resistance. Subsequently, VIGS experiments conducted in cotton demonstrated reduced disease resistance upon silencing of the GhGT64_4, may indicate its involvement in affecting lignin and jasmonic acid biosynthesis pathways, thus impacting cotton resistance. Weighted Gene Co-expression Network Analysis (WGCNA) revealed an early immune response against Verticillium dahliae in G. barbadense compared to G. hirsutum. Quantitative Reverse Transcription Polymerase Chain Reaction (qRT-PCR) analysis indicated that some GT64 genes might play a role under various biotic and abiotic stress conditions.

CONCLUSIONS: These discoveries enhance our knowledge of GT64 family members and lay the groundwork for future investigations into the disease resistance mechanisms of this gene in cotton.}, } @article {pmid39342108, year = {2024}, author = {Naqvi, M and Utheim, TP and Charnock, C}, title = {Whole genome sequencing and characterization of Corynebacterium isolated from the healthy and dry eye ocular surface.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {368}, pmid = {39342108}, issn = {1471-2180}, abstract = {BACKGROUND: The purpose of this study was to characterize Corynebacterium isolated from the ocular surface of dry eye disease patients and healthy controls. We aimed to investigate the pathogenic potential of these isolates in relation to ocular surface health. To this end, we performed whole genome sequencing in combination with biochemical, enzymatic, and antibiotic susceptibility tests. In addition, we employed deferred growth inhibition assays to examine how Corynebacterium isolates may impact the growth of potentially competing microorganisms including the ocular pathogens Pseudomonas aeruginosa and Staphylococcus aureus, as well as other Corynebacterium present on the eye.

RESULTS: The 23 isolates were found to belong to 8 different species of Corynebacterium with genomes ranging from 2.12 mega base pairs in a novel Corynebacterium sp. to 2.65 mega base pairs in C. bovis. Whole genome sequencing revealed the presence of a range of antimicrobial targets present in all isolates. Pangenome analysis showed the presence of 516 core genes and that the pangenome is open. Phenotypic characterization showed variously urease, lipase, mucinase, protease and DNase activity in some isolates. Attention was particularly drawn to a potentially new or novel Corynebacterium species which had the smallest genome, and which produced a range of hydrolytic enzymes. Strikingly the isolate inhibited in vitro the growth of a range of possible pathogenic bacteria as well as other Corynebacterium isolates. The majority of Corynebacterium species included in this study did not seem to possess canonical pathogenic activity.

CONCLUSIONS: This study is the first reported genomic and biochemical characterization of ocular Corynebacterium. A number of potential virulence factors were identified which may have direct relevance for ocular health and contribute to the finding of our previous report on the ocular microbiome, where it was shown that DNA libraries were often dominated by members of this genus. Particularly interesting in this regard was the observation that some Corynebacterium, particularly new or novel Corynebacterium sp. can inhibit the growth of other ocular Corynebacterium as well as known pathogens of the eye.}, } @article {pmid39340575, year = {2024}, author = {Heuberger, M and Bernasconi, Z and Said, M and Jung, E and Herren, G and Widrig, V and Šimková, H and Keller, B and Sánchez-Martín, J and Wicker, T}, title = {Analysis of a global wheat panel reveals a highly diverse introgression landscape and provides evidence for inter-homoeologue chromosomal recombination.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {137}, number = {10}, pages = {236}, pmid = {39340575}, issn = {1432-2242}, support = {310030_212428//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung/ ; 310030B_182833//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung/ ; 310030_204165//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung/ ; RYC2021-032699-I//Agencia Estatal de Investigación/ ; }, mesh = {*Triticum/genetics/microbiology ; *Quantitative Trait Loci ; *Chromosomes, Plant/genetics ; *Disease Resistance/genetics ; Plant Diseases/genetics/microbiology ; Genetic Introgression ; Chromosome Mapping ; Plant Breeding ; Recombination, Genetic ; }, abstract = {This study highlights the agronomic potential of rare introgressions, as demonstrated by a major QTL for powdery mildew resistance on chromosome 7D. It further shows evidence for inter-homoeologue recombination in wheat. Agriculturally important genes are often introgressed into crops from closely related donor species or landraces. The gene pool of hexaploid bread wheat (Triticum aestivum) is known to contain numerous such "alien" introgressions. Recently established high-quality reference genome sequences allow prediction of the size, frequency and identity of introgressed chromosome regions. Here, we characterise chromosomal introgressions in bread wheat using exome capture data from the WHEALBI collection. We identified 24,981 putative introgression segments of at least 2 Mb across 434 wheat accessions. Detailed study of the most frequent introgressions identified T. timopheevii or its close relatives as a frequent donor species. Importantly, 118 introgressions of at least 10 Mb were exclusive to single wheat accessions, revealing that large populations need to be studied to assess the total diversity of the wheat pangenome. In one case, a 14 Mb introgression in chromosome 7D, exclusive to cultivar Pamukale, was shown by QTL mapping to harbour a recessive powdery mildew resistance gene. We identified multiple events where distal chromosomal segments of one subgenome were duplicated in the genome and replaced the homoeologous segment in another subgenome. We propose that these examples are the results of inter-homoeologue recombination. Our study produced an extensive catalogue of the wheat introgression landscape, providing a resource for wheat breeding. Of note, the finding that the wheat gene pool contains numerous rare, but potentially important introgressions and chromosomal rearrangements has implications for future breeding.}, } @article {pmid39338951, year = {2024}, author = {da Silva, MERJ and Breyer, GM and da Costa, MM and Brenig, B and Azevedo, VAC and Cardoso, MRI and Siqueira, FM}, title = {Genomic Analyses of Methicillin-Susceptible and Methicillin-Resistant Staphylococcus pseudintermedius Strains Involved in Canine Infections: A Comprehensive Genotypic Characterization.}, journal = {Pathogens (Basel, Switzerland)}, volume = {13}, number = {9}, pages = {}, doi = {10.3390/pathogens13090760}, pmid = {39338951}, issn = {2076-0817}, support = {001//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; }, abstract = {Staphylococcus pseudintermedius is frequently associated with several bacterial infections in dogs, highlighting a One Health concern due to the zoonotic potential. Given the clinical significance of this pathogen, we performed comprehensive genomic analyses of 28 S. pseudintermedius strains isolated from canine infections throughout whole-genome sequencing using Illumina HiSeq, and compared the genetic features between S. pseudintermedius methicillin-resistant (MRSP) and methicillin-susceptible (MSSP) strains. Our analyses determined that MRSP genomes are larger than MSSP strains, with significant changes in antimicrobial resistance genes and virulent markers, suggesting differences in the pathogenicity of MRSP and MSSP strains. In addition, the pangenome analysis of S. pseudintermedius from canine and human origins identified core and accessory genomes with 1847 and 3037 genes, respectively, which indicates that most of the S. pseudintermedius genome is highly variable. Furthermore, phylogenomic analysis clearly separated MRSP from MSSP strains, despite their infection sites, showing phylogenetic differences according to methicillin susceptibility. Altogether our findings underscore the importance of studying the evolutionary dynamics of S. pseudintermedius, which is crucial for the development of effective prevention and control strategies of resistant S. pseudintermedius infections.}, } @article {pmid39338445, year = {2024}, author = {García-Rivera, C and Molina-Pardines, C and Haro-Moreno, JM and Parra Grande, M and Rodríguez, JC and López-Pérez, M}, title = {Genomic Analysis of Antimicrobial Resistance in Pseudomonas aeruginosa from a "One Health" Perspective.}, journal = {Microorganisms}, volume = {12}, number = {9}, pages = {}, doi = {10.3390/microorganisms12091770}, pmid = {39338445}, issn = {2076-2607}, abstract = {The "One Health" approach provides a comprehensive framework for understanding antimicrobial resistance. This perspective is of particular importance in the study of Pseudomonas aeruginosa, as it is not only a pathogen that affects humans but also persists in environmental reservoirs. To assess evolutionary selection for niche-specific traits, a genomic comparison of 749 P. aeruginosa strains from three environments (clinical, aquatic, and soil) was performed. The results showed that the environment does indeed exert selective pressure on specific traits. The high percentage of persistent genome, the lack of correlation between phylogeny and origin of the isolate, and the high intrinsic resistance indicate that the species has a high potential for pathogenicity and resistance, regardless of the reservoir. The flexible genome showed an enrichment of metal resistance genes, which could act as a co-selection of antibiotic resistance genes. In the plasmids, resistance genes were found in multigenic clusters, with the presence of a mobile integron being prominent. This integron was identified in several pathogenic strains belonging to distantly related taxa with a worldwide distribution, showing the risk of rapid evolution of resistance. These results provide a more complete understanding of the evolution of P. aeruginosa, which could help develop new prevention strategies.}, } @article {pmid39338429, year = {2024}, author = {Hua, L and Ye, P and Li, X and Xu, H and Lin, F}, title = {Anti-Aflatoxigenic Burkholderia contaminans BC11-1 Exhibits Mycotoxin Detoxification, Phosphate Solubilization, and Cytokinin Production.}, journal = {Microorganisms}, volume = {12}, number = {9}, pages = {}, doi = {10.3390/microorganisms12091754}, pmid = {39338429}, issn = {2076-2607}, support = {2023NSFSC1261//Natural Science Foundation of Sichuan Province/ ; 1+9KJGG006//The "1+9" Key Scientific and Technological Project of Sichuan Academy of Agricultural Sciences/ ; 2024ZZCX007//Innovation funding of Sichuan Academy of Agricultural Sciences/ ; 2019QYXK002//Innovation funding of Sichuan Academy of Agricultural Sciences/ ; SKLCUSA-b202206//the Funding from State Key Laboratory for Conservation and Utilization of Subtropical Agro-Bioresources/ ; }, abstract = {The productivity and quality of agricultural crops worldwide are adversely affected by disease outbreaks and inadequate nutrient availability. Of particular concern is the potential increase in mycotoxin prevalence due to crop diseases, which poses a threat to food security. Microorganisms with multiple functions have been favored in sustainable agriculture to address such challenges. Aspergillus flavus is a prevalent aflatoxin B1 (AFB1)-producing fungus in China. Therefore, we wanted to obtain an anti-aflatoxigenic bacterium with potent mycotoxin detoxification ability and other beneficial properties. In the present study, we have isolated an anti-aflatoxigenic strain, BC11-1, of Burkholderia contaminans, from a forest rhizosphere soil sample obtained in Luzhou, Sichuan Province, China. We found that it possesses several beneficial properties, as follows: (1) a broad spectrum of antifungal activity but compatibility with Trichoderma species, which are themselves used as biocontrol agents, making it possible to use in a biocontrol mixture or individually with other biocontrol agents in an integrated management approach; (2) an exhibited mycotoxin detoxification capacity with a degradation ratio of 90% for aflatoxin B1 and 78% for zearalenone, suggesting its potential for remedial application; and (3) a high ability to solubilize phosphorus and produce cytokinin production, highlighting its potential as a biofertilizer. Overall, the diverse properties of BC11-1 render it a beneficial bacterium with excellent potential for use in plant disease protection and mycotoxin prevention and as a biofertilizer. Lastly, a pan-genomic analysis suggests that BC11-1 may possess other undiscovered biological properties, prompting further exploration of the properties of this unique strain of B. contaminans. These findings highlight the potential of using the anti-aflatoxigenic strain BC11-1 to enhance disease protection and improve soil fertility, thus contributing to food security. Given its multiple beneficial properties, BC11-1 represents a valuable microbial resource as a biocontrol agent and biofertilizer.}, } @article {pmid39337676, year = {2024}, author = {Cai, K and Song, X and Yue, W and Liu, L and Ge, F and Wang, J}, title = {Identification and Functional Characterization of Abiotic Stress Tolerance-Related PLATZ Transcription Factor Family in Barley (Hordeum vulgare L.).}, journal = {International journal of molecular sciences}, volume = {25}, number = {18}, pages = {}, doi = {10.3390/ijms251810191}, pmid = {39337676}, issn = {1422-0067}, support = {2022E10012//Digital Dry Land Crops of Zhejiang Province/ ; LQ23C130004//Natural Science Foundation of Zhejiang Province/ ; 2021C02064-3-2//Zhejiang Science and Technology Major Program on Agricultural New Variety Breeding/ ; CARS-05-01A-06//China Agriculture Research System/ ; }, mesh = {*Hordeum/genetics/metabolism ; *Plant Proteins/genetics/metabolism ; *Stress, Physiological/genetics ; *Transcription Factors/genetics/metabolism ; *Gene Expression Regulation, Plant ; *Phylogeny ; Multigene Family ; Chromosomes, Plant/genetics ; }, abstract = {Plant AT-rich sequence and zinc-binding proteins (PLATZs) are a novel category of plant-specific transcription factors involved in growth, development, and abiotic stress responses. However, the PLATZ gene family has not been identified in barley. In this study, a total of 11 HvPLATZs were identified in barley, and they were unevenly distributed on five of the seven chromosomes. The phylogenetic tree, incorporating PLATZs from Arabidopsis, rice, maize, wheat, and barley, could be classified into six clusters, in which HvPLATZs are absent in Cluster VI. HvPLATZs exhibited conserved motif arrangements with a characteristic PLATZ domain. Two segmental duplication events were observed among HvPLATZs. All HvPLATZs were core genes present in 20 genotypes of the barley pan-genome. The HvPLATZ5 coding sequences were conserved among 20 barley genotypes, whereas HvPLATZ4/9/10 exhibited synonymous single nucleotide polymorphisms (SNPs); the remaining ones showed nonsynonymous variations. The expression of HvPLATZ2/3/8 was ubiquitous in various tissues, whereas HvPLATZ7 appeared transcriptionally silent; the remaining genes displayed tissue-specific expression. The expression of HvPLATZs was modulated by salt stress, potassium deficiency, and osmotic stress, with response patterns being time-, tissue-, and stress type-dependent. The heterologous expression of HvPLATZ3/5/6/8/9/10/11 in yeast enhanced tolerance to salt and osmotic stress, whereas the expression of HvPLATZ2 compromised tolerance. These results advance our comprehension and facilitate further functional characterization of HvPLATZs.}, } @article {pmid39336793, year = {2024}, author = {Bouras, N and Bakli, M and Dif, G and Smaoui, S and Șmuleac, L and Paşcalău, R and Menendez, E and Nouioui, I}, title = {The Phylogenomic Characterization of Planotetraspora Species and Their Cellulases for Biotechnological Applications.}, journal = {Genes}, volume = {15}, number = {9}, pages = {}, doi = {10.3390/genes15091202}, pmid = {39336793}, issn = {2073-4425}, mesh = {*Phylogeny ; *Cellulases/genetics/metabolism ; *Genome, Bacterial ; RNA, Ribosomal, 16S/genetics ; Bacterial Proteins/genetics/metabolism ; Biotechnology ; Genomics/methods ; }, abstract = {This study aims to evaluate the in silico genomic characteristics of five species of the genus Planotetraspora: P. kaengkrachanensis, P. mira, P. phitsanulokensis, P. silvatica, and P. thailandica, with a view to their application in therapeutic research. The 16S rRNA comparison indicated that these species were phylogenetically distinct. Pairwise comparisons of digital DNA-DNA hybridization (dDDH) and OrthoANI values between these studied type strains indicated that dDDH values were below 62.5%, while OrthoANI values were lower than 95.3%, suggesting that the five species represent distinct genomospecies. These results were consistent with the phylogenomic study based on core genes and the pangenome analysis of these five species within the genus Planotetraspora. However, the genome annotation showed some differences between these species, such as variations in the number of subsystem category distributions across whole genomes (ranging between 1979 and 2024). Additionally, the number of CAZYme (Carbohydrate-Active enZYme) genes ranged between 298 and 325, highlighting the potential of these bacteria for therapeutic research applications. The in silico physico-chemical characteristics of cellulases from Planotetraspora species were analyzed. Their 3D structure was modeled, refined, and validated. A molecular docking analysis of this cellulase protein structural model was conducted with cellobiose, cellotetraose, laminaribiose, carboxymethyl cellulose, glucose, and xylose ligand. Our study revealed significant interaction between the Planotetraspora cellulase and cellotetraose substrate, evidenced by stable binding energies. This suggests that this bacterial enzyme holds great potential for utilizing cellotetraose as a substrate in various applications. This study enriches our understanding of the potential applications of Planotetraspora species in therapeutic research.}, } @article {pmid39333542, year = {2024}, author = {Stocke, K and Lamont, G and Tan, J and Scott, DA}, title = {Delineation of global, absolutely essential and conditionally essential pangenomes of Porphyromonas gingivalis.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {22247}, pmid = {39333542}, issn = {2045-2322}, support = {DE026963/DE/NIDCR NIH HHS/United States ; GM125504/GM/NIGMS NIH HHS/United States ; }, mesh = {*Porphyromonas gingivalis/genetics/metabolism ; *Genome, Bacterial ; *Phylogeny ; *Genes, Essential ; Humans ; Periodontitis/microbiology ; Bacteroidaceae Infections/microbiology ; }, abstract = {Porphyromonas gingivalis is a Gram-negative, anaerobic oral pathobiont, an etiological agent of periodontitis and the most commonly studied periodontal bacterium. Multiple low passage clinical isolates were sequenced, and their genomes compared to several laboratory strains. Phylogenetic distances were mapped, a gene absence-presence matrix generated, and core (present in all genomes) and accessory (absent in one or more genomes) genes delineated. Subsequently, a second pangenome delineating the prevalence of inherently essential genes was generated. The prevalence of genes conditionally essential for surviving tobacco exposure, abscess formation and epithelial invasion was also determined, in addition to genes encoding key proteolytic enzymes containing putative signal peptides. While the absolutely essential pangenome was highly conserved, significant differences in the complete and conditionally essential pangenomes were apparent. Thus, genetic plasticity appears to lie primarily in gene sets facilitating adaptation to variant disease-related environments. Those genes that are highly pervasive in the P. gingivalis absolutely essential pangenome or are highly prevalent and essential for fitness in disease-relevant models, may represent particularly attractive therapeutic targets worthy of further investigation. As mutations in absolutely essential genes are expected to be lethal, the data provided herein should also facilitate improved planning for P. gingivalis gene mutation strategies.}, } @article {pmid39332601, year = {2024}, author = {Uzzal Hossain, M and Khan Tanvir, N and Naimur Rahman, ABZ and Mahmud Chowdhury, Z and Shahadat Hossain, M and Dey, S and Bhattacharjee, A and Ahammad, I and Salma Zohora, U and Hashem, A and Chandra Das, K and Ara Keya, C and Salimullah, M}, title = {From sequence to Significance: A thorough investigation of the distinctive genome features Uncovered in C. Werkmanii strain NIB003.}, journal = {Gene}, volume = {}, number = {}, pages = {148965}, doi = {10.1016/j.gene.2024.148965}, pmid = {39332601}, issn = {1879-0038}, abstract = {Citrobacter werkmanii (C. werkmanii), an opportunistic urinary bacterium that causes diarrhea, is poorly understood. Our research focuses on genetic features that are crucial to disease development, such as pathogenic interactions, antibiotic resistance, virulence genes and genetic variation. Following its morphological, biochemical, and molecular identification, the whole genome of C. werkmanii strain NIB003 was sequenced in Bangladesh for the first time. Despite having around 80% whole genome conservation, the research shows that the Bangladeshi strain forms a separate phylogenetic cluster. This emphasises the genetic variability within C. werkmanii, resulting in particular modifications at the strain level and changes in its ability to cause disease. The results of the genetic diversity analysis indicate that the Bangladeshi sequenced genome is more diverse than the other strains due to the existence of unique features, such as the presence of t-RNA binding domain and N-6 adenine-specific DNA methylases.}, } @article {pmid39329486, year = {2024}, author = {Nawrocki, EM and Kudva, IT and Dudley, EG}, title = {Investigating the adherence factors of Escherichia coli at the bovine recto-anal junction.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0127024}, doi = {10.1128/spectrum.01270-24}, pmid = {39329486}, issn = {2165-0497}, abstract = {UNLABELLED: Shiga toxin-producing Escherichia coli (STEC) are major foodborne pathogens that result in thousands of hospitalizations each year in the United States. Cattle, the natural reservoir, harbor STEC asymptomatically at the recto-anal junction (RAJ). The molecular mechanisms that allow STEC and non-STEC E. coli to adhere to the RAJ are not fully understood, in part because most adherence studies utilize human cell culture models. To identify a set of bovine-specific E. coli adherence factors, we used the primary RAJ squamous epithelial (RSE) cell-adherence assay to coculture RSE cells from healthy Holstein cattle with diverse E. coli strains from bovine and nonbovine sources. We hypothesized that a comparative genomic analysis of the strains would reveal factors associated with RSE adherence. After performing adherence assays with historical strains from the E. coli Reference Center (n = 62) and strains newly isolated from the RAJ (n = 15), we used the bioinformatic tool Roary to create a pangenome of this collection. We classified strains as either low or high adherence and using the Scoary program compiled a list of accessory genes correlated with the "high adherence" strains. While none of the correlations were statistically significant, several gene clusters were associated with the high-adherence phenotype, including two that encode uncharacterized proteins. We also demonstrated that non-STEC E. coli strains from the RAJ are more adherent than other isolates and can outcompete STEC in coculture with RSEs. Further analysis of adherence-associated gene clusters may lead to an improved understanding of the molecular mechanisms of RSE adherence and may help develop probiotics targeting STEC in cattle.

IMPORTANCE: E. coli strains that produce Shiga toxin cause foodborne illness in humans but colonize cattle asymptomatically. The molecular mechanisms that E. coli uses to adhere to cattle cells are largely unknown. Various strategies are used to control E. coli in livestock and limit the risk of outbreaks. These include vaccinating animals against common E. coli strains and supplementing their feed with probiotics to reduce the carriage of pathogens. No strategy is completely effective, and probiotics often fail to colonize the animals. We sought to clarify the genes required for E. coli adherence in cattle by quantifying the attachment to bovine cells in a diverse set of bacteria. We also isolated nonpathogenic E. coli from healthy cows and showed that a representative isolate could outcompete pathogenic strains in cocultures. We propose that the focused study of these strains and their adherence factors will better inform the design of probiotics and vaccines for livestock.}, } @article {pmid39328909, year = {2024}, author = {Ong, CT and Blackall, PJ and Boe-Hansen, GB and deWet, S and Hayes, BJ and Indjein, L and Korolik, V and Minchin, C and Nguyen, LT and Nordin, Y and Siddle, H and Turni, C and Venus, B and Westman, ME and Zhang, Z and Tabor, AE}, title = {Whole-genome comparison using complete genomes from Campylobacter fetus strains revealed single nucleotide polymorphisms on non-genomic islands for subspecies differentiation.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1452564}, pmid = {39328909}, issn = {1664-302X}, abstract = {INTRODUCTION: Bovine Genital Campylobacteriosis (BGC), caused by Campylobacter fetus subsp. venerealis, is a sexually transmitted bacterium that significantly impacts cattle reproductive performance. However, current detection methods lack consistency and reliability due to the close genetic similarity between C. fetus subsp. venerealis and C. fetus subsp. fetus. Therefore, this study aimed to utilize complete genome analysis to distinguish genetic features between C. fetus subsp. venerealis and other subspecies, thereby enhancing BGC detection for routine screening and epidemiological studies.

METHODS AND RESULTS: This study reported the complete genomes of four C. fetus subsp. fetus and five C. fetus subsp. venerealis, sequenced using long-read sequencing technologies. Comparative whole-genome analyses (n = 25) were conducted, incorporating an additional 16 complete C. fetus genomes from the NCBI database, to investigate the genomic differences between these two closely related C. fetus subspecies. Pan-genomic analyses revealed a core genome consisting of 1,561 genes and an accessory pangenome of 1,064 genes between the two C. fetus subspecies. However, no unique predicted genes were identified in either subspecies. Nonetheless, whole-genome single nucleotide polymorphisms (SNPs) analysis identified 289 SNPs unique to one or the C. fetus subspecies. After the removal of SNPs located on putative genomic islands, recombination sites, and those causing synonymous amino acid changes, the remaining 184 SNPs were functionally annotated. Candidate SNPs that were annotated with the KEGG "Peptidoglycan Biosynthesis" pathway were recruited for further analysis due to their potential association with the glycine intolerance characteristic of C. fetus subsp. venerealis and its biovar variant. Verification with 58 annotated C. fetus genomes, both complete and incomplete, from RefSeq, successfully classified these seven SNPs into two groups, aligning with their phenotypic identification as CFF (Campylobacter fetus subsp. fetus) or CFV/CFVi (Campylobacter fetus subsp. venerealis and its biovar variant). Furthermore, we demonstrated the application of mraY SNPs for detecting C. fetus subspecies using a quantitative PCR assay.

DISCUSSION: Our results highlighted the high genetic stability of C. fetus subspecies. Nevertheless, Campylobacter fetus subsp. venerealis and its biovar variants encoded common SNPs in genes related to glycine intolerance, which differentiates them from C. fetus subsp. fetus. This discovery highlights the potential of employing a multiple-SNP assay for the precise differentiation of C. fetus subspecies.}, } @article {pmid39325737, year = {2024}, author = {Guo, M and Bi, G and Wang, H and Ren, H and Chen, J and Lian, Q and Wang, X and Fang, W and Zhang, J and Dong, Z and Pang, Y and Zhang, Q and Huang, S and Yan, J and Zhao, X}, title = {Genomes of autotetraploid wild and cultivated Ziziphus mauritiana reveal polyploid evolution and crop domestication.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiae512}, pmid = {39325737}, issn = {1532-2548}, abstract = {Indian jujube (Ziziphus mauritiana) holds a prominent position in the global fruit and pharmaceutical markets. Here, we report the assemblies of haplotype-resolved, telomere-to-telomere genomes of autotetraploid wild and cultivated Indian jujube plants using a two-stage assembly strategy. The generation of these genomes permitted in-depth investigations into the divergence and evolutionary history of this important fruit crop. Using a graph-based pan-genome constructed from eight monoploid genomes, we identified structural variation (SV)-FST hotspots and SV hotspots. Gap-free genomes provide a means to obtain a global view of centromere structures. We identified presence-absence variation-related genes in four monoploid genomes (cI, cIII, wI, and wIII) and resequencing populations. We also present the population structure and domestication trajectory of the Indian jujube based on the resequencing of 73 wild and cultivated accessions. Metabolomic and transcriptomic analyses of mature fruits of wild and cultivated accessions unveiled the genetic basis underlying loss of fruit astringency during domestication of Indian jujube. This study reveals mechanisms underlying the divergence, evolution, and domestication of the autotetraploid Indian jujube and provides rich and reliable genetic resources for future research.}, } @article {pmid39322283, year = {2024}, author = {Narechania, A and Bobo, D and Deitz, K and DeSalle, R and Planet, P and Mathema, B}, title = {Rapid SARS-COV2 surveillance using clinical, pooled, or wastewater sequence as a sensor for population change.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278594.123}, pmid = {39322283}, issn = {1549-5469}, abstract = {The COVID-19 pandemic has highlighted the critical role of genomic surveillance for guiding policy and control. Timeliness is key, but sequence alignment and phylogeny slows most surveillance techniques. Millions of SARS-CoV-2 genomes have been assembled. Phylogenetic methods are ill equipped to handle this sheer scale. We introduce a pangenomic measure that examines the information diversity of a k-mer library drawn from a country's complete set of clinical, pooled, or wastewater sequence. Quantifying diversity is central to ecology. Hill numbers, or the effective number of species in a sample, provide a simple metric for comparing species diversity across environments. The more diverse the sample, the higher the Hill number. We adopt this ecological approach and consider each k-mer an individual and each genome a transect in the pangenome of the species. Structured in this way, Hill numbers summarize the temporal trajectory of pandemic variants, collapsing each day's assemblies into genome equivalents. For pooled or wastewater sequence, we instead compare days using survey sequence divorced from individual infections. Across data from the UK, USA, and South Africa, we trace the ascendance of new variants of concern as they emerge in local populations well before these variants are named and added to phylogenetic databases. Using data from San Diego wastewater, we monitor these same population changes from raw, unassembled sequence. This history of emerging variants senses all available data as it is sequenced, intimating variant sweeps to dominance or declines to extinction at the leading edge of the COVID19 pandemic.}, } @article {pmid39322278, year = {2024}, author = {Fornezza, S and Delvecchio, VS and Harvey, WT and Dishuck, PC and Eichler, EE and Giannuzzi, G}, title = {AGAP duplicons associate with structural diversity at Chromosome 10q11.22.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.279454.124}, pmid = {39322278}, issn = {1549-5469}, abstract = {The 10q11.22 chromosomal region is a duplication-rich interval of the human genome and one of the last to be fully assembled. It carries copy-number variable genes associated with intellectual disability, bipolar disorder, and obesity. In this study, we characterized the structural diversity at this locus by analyzing 64 haploid assemblies produced by the Human Pangenome Reference Consortium. We identified eleven alternative haplotypes that differ in the copy number and/or orientation of large genomic segments, ranging from hundreds of kilobase pairs (kbp) to over one megabase pair (Mbp). We uncovered a 2.4 Mbp size difference between the shortest and longest haplotypes. Breakpoint analysis revealed that genomic instability results from nonallelic homologous recombination between segmental duplication (SD) pairs with varying similarity (94.4-99.6%). Nonetheless, these pairs generally recombine at positions where their identity is higher (>99.6%). Recurrent inversions occur with varying breakpoints within the same inverted SD pair. Inversion polymorphisms shuffle the entire SD arrangement, creating new predispositions to copy-number variations. The SD architecture is associated with a catarrhine-specific subgroup of the AGAP gene family, which likely triggered the accumulation of SDs at this locus over the past 25 million years of human evolution. Our results reveal extensive structural diversity and genomic instability at the 10q11.22 locus and expand the general understanding of the mutational mechanisms behind SD-mediated rearrangements.}, } @article {pmid39320535, year = {2024}, author = {Chen, L and Zhang, L and Li, Y and Qiao, L and Kumar, S}, title = {Screening of promising molecules against potential drug targets in Yersinia pestis by integrative pan and subtractive genomics, docking and simulation approach.}, journal = {Archives of microbiology}, volume = {206}, number = {10}, pages = {415}, pmid = {39320535}, issn = {1432-072X}, support = {xjr2020020//the Funding for school-level research projects of Yancheng Institute of Technology/ ; }, mesh = {*Yersinia pestis/drug effects/genetics/metabolism ; *Molecular Docking Simulation ; *Anti-Bacterial Agents/pharmacology/chemistry ; *Bacterial Proteins/genetics/metabolism/chemistry ; *Plague/drug therapy/microbiology ; *Genomics ; Humans ; Molecular Dynamics Simulation ; }, abstract = {This study focuses on Yersinia pestis, the bacterium responsible for plague, which posed a severe threat to public health in history. Despite the availability of antibiotics treatment, the emergence of antibiotic resistance in this pathogen has increased challenges of controlling the infections and plague outbreaks. The development of new drug targets and therapies is urgently needed. This research aims to identify novel protein targets from 28 Y. pestis strains by the integrative pan-genomic and subtractive genomics approach. Additionally, it seeks to screen out potential safe and effective alternative therapies against these targets via high-throughput virtual screening. Targets should lack homology to human, gut microbiota, and known human 'anti-targets', while should exhibit essentiality for pathogen's survival and virulence, druggability, antibiotic resistance, and broad spectrum across multiple pathogenic bacteria. We identified two promising targets: the aminotransferase class I/class II domain-containing protein and 3-oxoacyl-[acyl-carrier-protein] synthase 2. These proteins were modeled using AlphaFold2, validated through several structural analyses, and were subjected to molecular docking and ADMET analysis. Molecular dynamics simulations determined the stability of the ligand-target complexes, providing potential therapeutic options against Y. pestis.}, } @article {pmid39318438, year = {2024}, author = {Cunha, F and Zhai, Y and Casaro, S and Jones, KL and Hernandez, M and Bisinotto, RS and Kariyawasam, S and Brown, MB and Phillips, A and Jeong, KC and Galvão, KN}, title = {Pangenomic and biochemical analyses of Helcococcus ovis reveal widespread tetracycline resistance and a novel bacterial species, Helcococcus bovis.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1456569}, pmid = {39318438}, issn = {1664-302X}, abstract = {Helcococcus ovis (H. ovis) is an opportunistic bacterial pathogen of a wide range of animal hosts including domestic ruminants, swine, avians, and humans. In this study, we sequenced the genomes of 35 Helcococcus sp. clinical isolates from the uterus of dairy cows and explored their antimicrobial resistance and biochemical phenotypes in vitro. Phylogenetic and average nucleotide identity analyses classified four Helcococcus isolates within a cryptic clade representing an undescribed species, for which we propose the name Helcococcus bovis sp. nov. By establishing this new species clade, we also resolve the longstanding question of the classification of the Tongji strain responsible for a confirmed human conjunctival infection. This strain did not neatly fit into H. ovis and is instead a member of H. bovis. We applied whole genome comparative analyses to explore the pangenome, resistome, virulome, and taxonomic diversity of the remaining 31 H. ovis isolates. An overwhelming 97% of H. ovis strains (30 out of 31) harbor mobile tetracycline resistance genes and displayed significantly increased minimum inhibitory concentrations of tetracyclines in vitro. The high prevalence of mobile tetracycline resistance genes makes H. ovis a significant antimicrobial resistance gene reservoir in our food chain. Finally, the phylogenetic distribution of co-occurring high-virulence determinant genes of H. ovis across unlinked and distant loci highlights an instance of convergent gene loss in the species. In summary, this study showed that mobile genetic element-mediated tetracycline resistance is widespread in H. ovis, and that there is evidence of co-occurring virulence factors across clades suggesting convergent gene loss in the species. Finally, we introduced a novel Helcococcus species closely related to H. ovis, called H. bovis sp. nov., which has been reported to cause infection in humans.}, } @article {pmid39315152, year = {2024}, author = {Zheng, B and Xu, J and Zhang, Y and Qin, J and Yuan, D and Fan, T and Wu, W and Chen, Y and Jiang, Y}, title = {MBCN: A novel reference database for Effcient Metagenomic analysis of human gut microbiome.}, journal = {Heliyon}, volume = {10}, number = {18}, pages = {e37422}, pmid = {39315152}, issn = {2405-8440}, abstract = {Metagenomic shotgun sequencing data can identify microbes and their proportions. But metagenomic shotgun data profiling results obtained from multiple projects using different reference databases are difficult to compare and apply meta-analysis. Our work aims to create a novel collection of human gut prokaryotic genomes, named Microbiome Collection Navigator (MBCN). 2379 human gut metagenomic samples are screened, and 16,785 metagenome-assembled genomes (MAGs) are assembled using a standardized pipeline. In addition, MAGs are combined with the representative genomes from public prokaryotic genomes collections to cluster, and pan-genomes for each cluster's genomes are constructed to build Kraken2 and Bracken databases. The databases built by MBCN are more comprehensive and accurate for profiling metagenomic reads comparing with other collections on simulated reads and virtual bio-projects. We profile 1082 human gut metagenomic samples with MBCN database and organize profiles and metadata on the web program. Meanwhile, using MBCN as a reference database, we also develop a unified, standardized, and systematic metagenomic analysis pipeline and platform, named MicrobiotaCN (http://www.microbiota.cn) and common statistical and visualization tools for microbiome research are integrated into the web program. Taken together, MBCN and MicrobiotaCN can be a valuable resource and a powerful tool that allows researchers to perform metagenomic analysis by a unified pipeline efficiently.}, } @article {pmid39308021, year = {2024}, author = {Liu, JN and Yan, L and Chai, Z and Liang, Q and Dong, Y and Wang, C and Li, X and Li, C and Mu, Y and Gong, A and Yang, J and Li, J and Yang, KQ and Wu, D and Fang, H}, title = {Pan-genome analyses of eleven Fraxinus species provide insights into salt adaptation in ash trees.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101137}, doi = {10.1016/j.xplc.2024.101137}, pmid = {39308021}, issn = {2590-3462}, abstract = {Ash trees (Fraxinus) exhibit rich genetic diversity and wide adaptation to various ecological environments, several of which are highly salt-tolerant. Dissecting the genomic basis underlying ash tree salt adaptation is vital for its resistance breeding. Here, we presented eleven high-quality chromosome-level genome assemblies for Fraxinus species, revealing two unequal sub-genome compositions and two more recent whole-genome triplication events in evolutionary history. A Fraxinus structural variation-based pan-genome was constructed and revealed that presence-absence variations (PAVs) of transmembrane transport genes likely contribute to Fraxinus salt adaptation. Through whole-genome resequencing of an inter-species cross F1-population of F. velutina 'Lula 3' (salt-tolerant) × F. pennsylvanica 'Lula 5' (salt-sensitive), we performed a salt tolerance PAV-based quantitative trait loci (QTL) mapping and pinpointed two PAV-QTLs and candidate genes associated with Fraxinus salt tolerance. Mechanismly, FvbHLH85 enhanced salt tolerance by mediating reactive oxygen species and Na[+]/K[+] homeostasis, while FvSWEET5 by mediating osmotic homeostasis. Collectively, these findings provide valuable genomic resources for Fraxinus salt resistance breeding and research community.}, } @article {pmid39297879, year = {2024}, author = {Sarwal, V and Lee, S and Yang, J and Sankararaman, S and Chaisson, M and Eskin, E and Mangul, S}, title = {VISTA: an integrated framework for structural variant discovery.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {5}, pages = {}, doi = {10.1093/bib/bbae462}, pmid = {39297879}, issn = {1477-4054}, mesh = {Humans ; *Genomic Structural Variation ; *Genome, Human ; *Software ; Whole Genome Sequencing/methods ; Algorithms ; Genomics/methods ; Computational Biology/methods ; Genetic Variation ; }, abstract = {Structural variation (SV) refers to insertions, deletions, inversions, and duplications in human genomes. SVs are present in approximately 1.5% of the human genome. Still, this small subset of genetic variation has been implicated in the pathogenesis of psoriasis, Crohn's disease and other autoimmune disorders, autism spectrum and other neurodevelopmental disorders, and schizophrenia. Since identifying structural variants is an important problem in genetics, several specialized computational techniques have been developed to detect structural variants directly from sequencing data. With advances in whole-genome sequencing (WGS) technologies, a plethora of SV detection methods have been developed. However, dissecting SVs from WGS data remains a challenge, with the majority of SV detection methods prone to a high false-positive rate, and no existing method able to precisely detect a full range of SVs present in a sample. Previous studies have shown that none of the existing SV callers can maintain high accuracy across various SV lengths and genomic coverages. Here, we report an integrated structural variant calling framework, Variant Identification and Structural Variant Analysis (VISTA), that leverages the results of individual callers using a novel and robust filtering and merging algorithm. In contrast to existing consensus-based tools which ignore the length and coverage, VISTA overcomes this limitation by executing various combinations of top-performing callers based on variant length and genomic coverage to generate SV events with high accuracy. We evaluated the performance of VISTA on comprehensive gold-standard datasets across varying organisms and coverage. We benchmarked VISTA using the Genome-in-a-Bottle gold standard SV set, haplotype-resolved de novo assemblies from the Human Pangenome Reference Consortium, along with an in-house polymerase chain reaction (PCR)-validated mouse gold standard set. VISTA maintained the highest F1 score among top consensus-based tools measured using a comprehensive gold standard across both mouse and human genomes. VISTA also has an optimized mode, where the calls can be optimized for precision or recall. VISTA-optimized can attain 100% precision and the highest sensitivity among other variant callers. In conclusion, VISTA represents a significant advancement in structural variant calling, offering a robust and accurate framework that outperforms existing consensus-based tools and sets a new standard for SV detection in genomic research.}, } @article {pmid39305906, year = {2024}, author = {Gaye, A and Sene, ARG and Gadji, M and Deme, A and Cisse, A and Ndiaye, R}, title = {Toward building a comprehensive human pan-genome: The SEN-GENOME project.}, journal = {American journal of human genetics}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ajhg.2024.08.017}, pmid = {39305906}, issn = {1537-6605}, abstract = {The human reference genome (GRCh38), primarily sourced from individuals of European descent, falls short in capturing the vast genetic diversity across global populations. Efforts to diversify the reference genome face challenges in accessibility and representation, exacerbating the scarcity of African genomic data crucial for studying diseases prevalent in these populations. Sherman et al. proposed constructing reference genomes tailored to distinct human sub-populations. Their African Pan-Genome initiative highlighted substantial genetic variation missing from the GRCh38 human reference genome, emphasizing the necessity for population-specific genomes. In response, local initiatives like the Senegalese Genome project (SEN-GENOME) have emerged to document the genomes of historically overlooked populations. SEN-GENOME embodies community-driven decentralized research. With meticulous recruitment criteria and ethical practices, it aims to sequence 1,000 genomes from 31 ethnolinguistic groups, in the fourteen administrative regions of Senegal, fostering local genomic research tailored to the region. The key to SEN-GENOME's success is its commitment to local governance of data, capacity building, and integration with broader pan-genome projects in Africa. Despite the complexities of data harmonization and sharing, our collaborative efforts are aligned with common goals, ensuring steady progress toward a comprehensive human pan-genome. We invite and welcome collaboration with other research entities to achieve this shared vision. In summary, local initiatives such as SEN-GENOME are pivotal in bridging genomic disparities, offering pathways to equitable and inclusive genomic research. Collaborative endeavors guided by a collective vision for human health will propel us toward a more encompassing understanding of the human genome and better health through genomic medicine.}, } @article {pmid39304820, year = {2024}, author = {Silva, MH and Batista, LL and Malta, SM and Santos, ACC and Mendes-Silva, AP and Bonetti, AM and Ueira-Vieira, C and Dos Santos, AR}, title = {Unveiling the Brazilian kefir microbiome: discovery of a novel Lactobacillus kefiranofaciens (LkefirU) genome and in silico prospection of bioactive peptides with potential anti-Alzheimer properties.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {884}, pmid = {39304820}, issn = {1471-2164}, mesh = {*Kefir/microbiology ; *Alzheimer Disease ; *Lactobacillus/genetics ; *Genome, Bacterial ; *Microbiota ; Brazil ; *Peptides/chemistry/pharmacology ; Humans ; Molecular Docking Simulation ; Amyloid beta-Peptides/metabolism/genetics ; Amyloid Precursor Protein Secretases/metabolism ; Aspartic Acid Endopeptidases/genetics/metabolism ; Metagenomics/methods ; }, abstract = {BACKGROUND: Kefir is a complex microbial community that plays a critical role in the fermentation and production of bioactive peptides, and has health-improving properties. The composition of kefir can vary by geographic localization and weather, and this paper focuses on a Brazilian sample and continues previous work that has successful anti-Alzheimer properties. In this study, we employed shotgun metagenomics and peptidomics approaches to characterize Brazilian kefir further.

RESULTS: We successfully assembled the novel genome of Lactobacillus kefiranofaciens (LkefirU) and conducted a comprehensive pangenome analysis to compare it with other strains. Furthermore, we performed a peptidome analysis, revealing the presence of bioactive peptides encrypted by L. kefiranofaciens in the Brazilian kefir sample, and utilized in silico prospecting and molecular docking techniques to identify potential anti-Alzheimer peptides, targeting β-amyloid (fibril and plaque), BACE, and acetylcholinesterase. Through this analysis, we identified two peptides that show promise as compounds with anti-Alzheimer properties.

CONCLUSIONS: These findings not only provide insights into the genome of L. kefiranofaciens but also serve as a promising prototype for the development of novel anti-Alzheimer compounds derived from Brazilian kefir.}, } @article {pmid39304803, year = {2024}, author = {Martineau, M and Ambroset, C and Lefebvre, S and Kokabi, É and Léon, A and Tardy, F}, title = {Unravelling the main genomic features of Mycoplasma equirhinis.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {886}, pmid = {39304803}, issn = {1471-2164}, support = {CS-2020-2023-022-MYCOPAB//Institut Français du Cheval et de l'Equitation (IFCE)/ ; N15-2020//Fonds Eperon/ ; GIS20-SEP-01//GIS (Groupement d'Intérêt Scientifique) CENTAURE/ ; }, mesh = {*Mycoplasma/genetics/pathogenicity ; *Genome, Bacterial ; *Phylogeny ; *Genomics/methods ; Animals ; Horses ; Virulence/genetics ; Mycoplasma Infections/veterinary/microbiology ; }, abstract = {BACKGROUND: Mycoplasma spp. are wall-less bacteria with small genomes (usually 0.5-1.5 Mb). Many Mycoplasma (M.) species are known to colonize the respiratory tract of both humans and livestock animals, where they act as primary pathogens or opportunists. M. equirhinis was described for the first time in 1975 in horses but has been poorly studied since, despite regular reports of around 14% prevalence in equine respiratory disorders. We recently showed that M. equirhinis is not a primary pathogen but could play a role in co-infections of the respiratory tract. This study was a set up to propose the first genomic characterization to better our understanding of the M. equirhinis species.

RESULTS: Four circularized genomes, two of which were generated here, were compared in terms of synteny, gene content, and specific features associated with virulence or genome plasticity. An additional 20 scaffold-level genomes were used to analyse intra-species diversity through a pangenome phylogenetic approach. The M. equirhinis species showed consistent genomic homogeneity, pointing to potential clonality of isolates despite their varied geographical origins (UK, Japan and various places in France). Three different classes of mobile genetic elements have been detected: insertion sequences related to the IS1634 family, a putative prophage related to M. arthritidis and integrative conjugative elements related to M. arginini. The core genome harbours the typical putative virulence-associated genes of mycoplasmas mainly involved in cytoadherence and immune escape.

CONCLUSION: M. equirhinis is a highly syntenic, homogeneous species with a limited repertoire of mobile genetic elements and putative virulence genes.}, } @article {pmid39304579, year = {2024}, author = {Chandra, T and Jaiswal, S and Tomar, RS and Iquebal, MA and Kumar, D}, title = {Realizing visionary goals for the International Year of Millet (IYoM): accelerating interventions through advances in molecular breeding and multiomics resources.}, journal = {Planta}, volume = {260}, number = {4}, pages = {103}, pmid = {39304579}, issn = {1432-2048}, mesh = {*Millets/genetics ; *Plant Breeding/methods ; *Climate Change ; *Crops, Agricultural/genetics ; *Genomics ; Biodiversity ; Food Security ; Agriculture/methods ; Multiomics ; }, abstract = {Leveraging advanced breeding and multi-omics resources is vital to position millet as an essential "nutricereal resource," aligning with IYoM goals, alleviating strain on global cereal production, boosting resilience to climate change, and advancing sustainable crop improvement and biodiversity. The global challenges of food security, nutrition, climate change, and agrarian sustainability demand the adoption of climate-resilient, nutrient-rich crops to support a growing population amidst shifting environmental conditions. Millets, also referred to as "Shree Anna," emerge as a promising solution to address these issues by bolstering food production, improving nutrient security, and fostering biodiversity conservation. Their resilience to harsh environments, nutritional density, cultural significance, and potential to enhance dietary quality index made them valuable assets in global agriculture. Recognizing their pivotal role, the United Nations designated 2023 as the "International Year of Millets (IYoM 2023)," emphasizing their contribution to climate-resilient agriculture and nutritional enhancement. Scientific progress has invigorated efforts to enhance millet production through genetic and genomic interventions, yielding a wealth of advanced molecular breeding technologies and multi-omics resources. These advancements offer opportunities to tackle prevailing challenges in millet, such as anti-nutritional factors, sensory acceptability issues, toxin contamination, and ancillary crop improvements. This review provides a comprehensive overview of molecular breeding and multi-omics resources for nine major millet species, focusing on their potential impact within the framework of IYoM. These resources include whole and pan-genome, elucidating adaptive responses to abiotic stressors, organelle-based studies revealing evolutionary resilience, markers linked to desirable traits for efficient breeding, QTL analysis facilitating trait selection, functional gene discovery for biotechnological interventions, regulatory ncRNAs for trait modulation, web-based platforms for stakeholder communication, tissue culture techniques for genetic modification, and integrated omics approaches enabled by precise application of CRISPR/Cas9 technology. Aligning these resources with the seven thematic areas outlined by IYoM catalyzes transformative changes in millet production and utilization, thereby contributing to global food security, sustainable agriculture, and enhanced nutritional consequences.}, } @article {pmid39298479, year = {2024}, author = {Hellewell, J and Horsfield, ST and von Wachsmann, J and Gurbich, TA and Finn, RD and Iqbal, Z and Roberts, LW and Lees, JA}, title = {CELEBRIMBOR: Core and accessory genes from metagenomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae542}, pmid = {39298479}, issn = {1367-4811}, abstract = {MOTIVATION: Metagenome-Assembled Genomes (MAGs) or Single-cell Amplified Genomes (SAGs) are often incomplete, with sequences missing due to errors in assembly or low coverage. This presents a particular challenge for the identification of true gene frequencies within a microbial population, as core genes missing in only a few assemblies will be mischaracterized by current pangenome approaches.

RESULTS: Here, we present CELEBRIMBOR, a Snakemake pangenome analysis pipeline which uses a measure of genome completeness to automatically adjust the frequency threshold at which core genes are identified, enabling accurate core gene identification in MAGs and SAGs.

AVAILABILITY: CELEBRIMBOR is published under open source Apache 2.0 licence at https://github.com/bacpop/CELEBRIMBOR and is available as a Docker container from this repository. Supplementary material is available in the online version of the article.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid39294581, year = {2024}, author = {Vaibarova, V and Kralova, S and Palikova, M and Schwarzerova, J and Nejezchlebova, J and Cejkova, D and Cizek, A}, title = {Genetic and phenotypic diversity of Flavobacterium psychrophilum isolates from Czech salmonid fish farms.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {352}, pmid = {39294581}, issn = {1471-2180}, mesh = {Animals ; *Flavobacterium/genetics/isolation & purification/classification/drug effects ; Czech Republic ; *Fish Diseases/microbiology ; *Flavobacteriaceae Infections/microbiology/veterinary ; *Phylogeny ; *Genetic Variation ; *Multilocus Sequence Typing ; *Oncorhynchus mykiss/microbiology ; Anti-Bacterial Agents/pharmacology ; Serotyping ; Aquaculture ; Phenotype ; Virulence Factors/genetics ; Microbial Sensitivity Tests ; Drug Resistance, Bacterial/genetics ; Genome, Bacterial/genetics ; Trout/microbiology ; }, abstract = {BACKGROUND: The salmonid pathogen Flavobacterium psychrophilum poses a significant economic threat to global aquaculture, yet our understanding of its genetic and phenotypic diversity remains incomplete across much of its geographic range. In this study, we characterise the genetic and phenotypic diversity of 70 isolates collected from rainbow trout (Oncorhynchus mykiss) and brown trout (Salmo trutta m. fario) from fish farms in the Czech Republic between 2012 and 2019 to compare their genomic content with all draft or complete genomes present in the NCBI database (n = 187).

RESULTS: The Czech isolates underwent comprehensive evaluation, including multiplex PCR-based serotyping, genetic analysis, antimicrobial resistance testing, and assessment of selected virulence factors. Multiplex PCR serotyping revealed 43 isolates as Type 1, 23 as Type 2, with sporadic cases of Types 3 and 4. Multi-locus sequence typing unveiled 12 sequence types (ST), including seven newly described ones. Notably, 24 isolates were identified as ST329, a novel sequence type, while 22 were classified as the globally-distributed ST2. Phylogenetic analysis demonstrated clonal distribution of ST329 in the Czech Republic, with these isolates lacking a phage sequence in their genomes. Antimicrobial susceptibility testing revealed a high proportion of isolates classified as non-wild type with reduced susceptibility to oxolinic acid, oxytetracycline, flumequine, and enrofloxacin, while most isolates were classified as wild type for florfenicol, sulfamethoxazole-trimethoprim, and erythromycin. However, 31 isolates classified as wild type for florfenicol exhibited minimum inhibitory concentrations at the susceptibility breakpoint.

CONCLUSION: The prevalence of the Czech F. psychrophilum serotypes has evolved over time, likely influenced by the introduction of new isolates through international trade. Thus, it is crucial to monitor F. psychrophilum clones within and across countries using advanced methods such as MLST, serotyping, and genome sequencing. Given the open nature of the pan-genome, further sequencing of strains promises exciting discoveries in F. psychrophilum genomics.}, } @article {pmid39294752, year = {2024}, author = {Góngora, E and Lirette, AO and Freyria, NJ and Greer, CW and Whyte, LG}, title = {Metagenomic survey reveals hydrocarbon biodegradation potential of Canadian high Arctic beaches.}, journal = {Environmental microbiome}, volume = {19}, number = {1}, pages = {72}, pmid = {39294752}, issn = {2524-6372}, support = {Multi-Partner Research Initiative//Fisheries and Oceans Canada/ ; 273122//Fonds de recherche du Québec - Nature et technologies/ ; Polar Continental Shelf Project//Natural Resources Canada/ ; Northern Scientific Training Program//Polar Knowledge Canada/ ; }, abstract = {BACKGROUND: Decreasing sea ice coverage across the Arctic Ocean due to climate change is expected to increase shipping activity through previously inaccessible shipping routes, including the Northwest Passage (NWP). Changing weather conditions typically encountered in the Arctic will still pose a risk for ships which could lead to an accident and the uncontrolled release of hydrocarbons onto NWP shorelines. We performed a metagenomic survey to characterize the microbial communities of various NWP shorelines and to determine whether there is a metabolic potential for hydrocarbon degradation in these microbiomes.

RESULTS: We observed taxonomic and functional gene evidence supporting the potential of NWP beach microbes to degrade various types of hydrocarbons. The metagenomic and metagenome-assembled genome (MAG) taxonomy showed that known hydrocarbon-degrading taxa are present in these beaches. Additionally, we detected the presence of biomarker genes of aerobic and anaerobic degradation pathways of alkane and aromatic hydrocarbons along with complete degradation pathways for aerobic alkane degradation. Alkane degradation genes were present in all samples and were also more abundant (33.8 ± 34.5 hits per million genes, HPM) than their aromatic hydrocarbon counterparts (11.7 ± 12.3 HPM). Due to the ubiquity of MAGs from the genus Rhodococcus (23.8% of the MAGs), we compared our MAGs with Rhodococcus genomes from NWP isolates obtained using hydrocarbons as the carbon source to corroborate our results and to develop a pangenome of Arctic Rhodococcus. Our analysis revealed that the biodegradation of alkanes is part of the core pangenome of this genus. We also detected nitrogen and sulfur pathways as additional energy sources and electron donors as well as carbon pathways providing alternative carbon sources. These pathways occur in the absence of hydrocarbons allowing microbes to survive in these nutrient-poor beaches.

CONCLUSIONS: Our metagenomic analyses detected the genetic potential for hydrocarbon biodegradation in these NWP shoreline microbiomes. Alkane metabolism was the most prevalent type of hydrocarbon degradation observed in these tidal beach ecosystems. Our results indicate that bioremediation could be used as a cleanup strategy, but the addition of adequate amounts of N and P fertilizers, should be considered to help bacteria overcome the oligotrophic nature of NWP shorelines.}, } @article {pmid39289205, year = {2024}, author = {Bouznada, K and Saker, R and Belaouni, HA and Meklat, A}, title = {Phylogenomic Analysis Supports the Reclassification of Caldicoprobacter faecalis (Winter et al. 1988) Bouanane-Darenfed et al. (2015) as a Later Heterotypic Synonym of Caldicoprobacter oshimai Yokoyama et al. (2010).}, journal = {Current microbiology}, volume = {81}, number = {11}, pages = {363}, pmid = {39289205}, issn = {1432-0991}, mesh = {*Phylogeny ; *Genome, Bacterial ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; Nucleic Acid Hybridization ; }, abstract = {This study employs genome-based methodologies to explore the taxonomic relationship between Caldicoprobacter faecalis DSM 20678[T] and Caldicoprobacter oshimai DSM 21659[T]. The genome-based similarity indices calculations consisting of digital DNA-DNA Hybridization (dDDH), Average Amino Aid Identity (AAI), and Average Nucleotide Identity (ANI) between the genomes of these two type strains yielded percentages of 91.2%, 98.9%, and 99.1%, respectively. These values were above the recommended thresholds of 70% (dDDH) and 95-96% (ANI and AAI) for bacterial species delineation, indicating a shared taxonomic position for C. faecalis and C. oshimai. Furthermore, analysis utilizing the 'Bacterial Pan Genome Analysis' (BPGA) pipeline and constructing a Maximum Likelihood core-genes tree using FastTree2 consistently demonstrated the close relationship between C. faecalis DSM 20678[T] and C. oshimai DSM 21659[T], evident from their clustering in the core-genes phylogenomic tree. Based on these comprehensive findings, we propose the reclassification of C. faecalis as a later heterotypic synonym of C. oshimai.}, } @article {pmid39287376, year = {2024}, author = {Bucher-Johannessen, C and Senthakumaran, T and Avershina, E and Birkeland, E and Hoff, G and Bemanian, V and Tunsjø, H and Rounge, TB}, title = {Species-level verification of Phascolarctobacterium association with colorectal cancer.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0073424}, doi = {10.1128/msystems.00734-24}, pmid = {39287376}, issn = {2379-5077}, abstract = {We have previously demonstrated an association between increased abundance of Phascolarctobacterium and colorectal cancer (CRC) and adenomas in two independent Norwegian cohorts. Here we seek to verify our previous findings using new cohorts and methods. In addition, we characterize lifestyle and sex specificity, the functional potential of the Phascolarctobacterium species, and their interaction with other microbial species. We analyze Phascolarctobacterium with 16S rRNA sequencing, shotgun metagenome sequencing, and species-specific qPCR, using 2350 samples from three Norwegian cohorts-CRCAhus, NORCCAP, and CRCbiome-and a large publicly available data set, curatedMetagenomicData. Using metagenome-assembled genomes from the CRCbiome study, we explore the genomic characteristics and functional potential of the Phascolarctobacterium pangenome. Three species of Phascolarctobacterium associated with adenoma/CRC were consistently detected by qPCR and sequencing. Positive associations with adenomas/CRC were verified for Phascolarctobacterium succinatutens and negative associations were shown for Phascolarctobacterium faecium and adenoma in curatedMetagenomicData. Men show a higher prevalence of P. succinatutens across cohorts. Co-occurrence among Phascolarctobacterium species was low (<6%). Each of the three species shows distinct microbial composition and forms distinct correlation networks with other bacterial taxa, although Dialister invisus was negatively correlated to all investigated Phascolarctobacterium species. Pangenome analyses showed P. succinatutens to be enriched for genes related to porphyrin metabolism and degradation of complex carbohydrates, whereas glycoside hydrolase enzyme 3 was specific to P. faecium.IMPORTANCEUntil now Phascolarctobacterium has been going under the radar as a CRC-associated genus despite having been noted, but overseen, as such for over a decade. We found not just one, but two species of Phascolarctobacterium to be associated with CRC-Phascolarctobacterium succinatutens was more abundant in adenoma/CRC, while Phascolarctobacterium faecium was less abundant in adenoma. Each of them represents distinct communities, constituted by specific microbial partners and metabolic capacities-and they rarely occur together in the same patients. We have verified that P. succinatutens is increased in adenoma and CRC and this species should be recognized among the most important CRC-associated bacteria.}, } @article {pmid39283360, year = {2024}, author = {Geethanjali, S and Kadirvel, P and Periyannan, S}, title = {Wheat improvement through advances in single nucleotide polymorphism (SNP) detection and genotyping with a special emphasis on rust resistance.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {137}, number = {10}, pages = {224}, pmid = {39283360}, issn = {1432-2242}, mesh = {*Triticum/genetics/microbiology ; *Polymorphism, Single Nucleotide ; *Disease Resistance/genetics ; *Plant Diseases/microbiology/genetics ; *Plant Breeding/methods ; *Basidiomycota/pathogenicity ; Genetic Markers ; Genotyping Techniques/methods ; Genotype ; Genome, Plant ; }, abstract = {Single nucleotide polymorphism (SNP) markers in wheat and their prospects in breeding with special reference to rust resistance. Single nucleotide polymorphism (SNP)-based markers are increasingly gaining momentum for screening and utilizing vital agronomic traits in wheat. To date, more than 260 million SNPs have been detected in modern cultivars and landraces of wheat. This rapid SNP discovery was made possible through the release of near-complete reference and pan-genome assemblies of wheat and its wild relatives, coupled with whole genome sequencing (WGS) of thousands of wheat accessions. Further, genotyping customized SNP sites were facilitated by a series of arrays (9 to 820Ks), a cost effective substitute WGS. Lately, germplasm-specific SNP arrays have been introduced to characterize novel traits and detect closely linked SNPs for marker-assisted breeding. Subsequently, the kompetitive allele-specific PCR (KASP) assay was introduced for rapid and large-scale screening of specific SNP markers. Moreover, with the advances and reduction in sequencing costs, ample opportunities arise for generating SNPs artificially through mutations and in combination with next-generation sequencing and comparative genomic analyses. In this review, we provide historical developments and prospects of SNP markers in wheat breeding with special reference to rust resistance where over 50 genetic loci have been characterized through SNP markers. Rust resistance is one of the most essential traits for wheat breeding as new strains of the Puccinia fungus, responsible for rust diseases, evolve frequently and globally.}, } @article {pmid39282334, year = {2024}, author = {Olivos-Caicedo, KY and Fernandez, F and Daniel, SL and Anantharaman, K and Ridlon, JM and Alves, JMP}, title = {Pangenome analysis of Clostridium scindens : a collection of diverse bile acid and steroid metabolizing commensal gut bacterial strains.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.09.06.610859}, pmid = {39282334}, issn = {2692-8205}, abstract = {Clostridium scindens is a commensal gut bacterium capable of forming the secondary bile acids deoxycholic acid and lithocholic acid from the primary bile acids cholic acid and chenodeoxycholic acid, respectively, as well as converting glucocorticoids to androgens. Historically, only two strains, C. scindens ATCC 35704 and C. scindens VPI 12708, have been characterized in vitro and in vivo to any significant extent. The formation of secondary bile acids is important in maintaining normal gastrointestinal function, in regulating the structure of the gut microbiome, in the etiology of such diseases such as cancers of the GI tract, and in the prevention of Clostridium difficile infection. We therefore wanted to determine the pangenome of 34 cultured strains of C. scindens and a set of 200 metagenome-assembled genomes (MAGs) to understand the variability among strains. The results indicate that the 34 strains of C. scindens have an open pangenome with 12,720 orthologous gene groups, and a core genome with 1,630 gene families, in addition to 7,051 and 4,039 gene families in the accessory and unique (i.e., strain-exclusive) genomes, respectively. The core genome contains 39% of the proteins with predicted metabolic function, and, in the unique genome, the function of storage and processing of information prevails, with 34% of the proteins being in that category. The pangenome profile including the MAGs also proved to be open. The presence of bile acid inducible (bai) and steroid-17,20-desmolase (des) genes was identified among groups of strains. The analysis reveals that C. scindens strains are distributed into two clades, indicating the possible onset of C. scindens separation into two species, confirmed by gene content, phylogenomic, and average nucleotide identity (ANI) analyses. This study provides insight into the structure and function of the C. scindens pangenome, offering a genetic foundation of significance for many aspects of research on the intestinal microbiota and bile acid metabolism.}, } @article {pmid39282288, year = {2024}, author = {Littlefield, C and Lazaro-Guevara, JM and Stucki, D and Lansford, M and Pezzolesi, MH and Taylor, EJ and Wolfgramm, EC and Taloa, J and Lao, K and Dumaguit, CDC and Ridge, PG and Tavana, JP and Holland, WL and Raphael, KL and Pezzolesi, MG}, title = {A Draft Pacific Ancestry Pangenome Reference.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.07.606392}, pmid = {39282288}, issn = {2692-8205}, abstract = {Individuals of Pacific ancestry suffer some of the highest rates of health disparities yet remain vastly underrepresented in genomic research, including currently available linear and pangenome references. To begin addressing this, we developed the first Pacific ancestry pangenome reference using 23 individuals with diverse Pacific ancestry. We assembled 46 haploid genomes from these 23 individuals, resulting in highly accurate and contiguous genome assemblies with an average quality value of 55.0 and an average N50 of 40.7 Mb, marking the first de novo assembly of highly accurate Pacific ancestry genomes. We combined these assemblies to create a pangenome reference, which added 30.6 Mb of novel sequence missing from the Human Pangenome Reference Consortium (HPRC) reference. Mapping short reads to this pangenome reduced variant call errors and yielded more true-positive variants compared to the HPRC and T2T-CHM13 references. This Pacific ancestry pangenome reference serves as a resource to enhance genetic analyses for this underserved population.}, } @article {pmid39279860, year = {2024}, author = {Feng, Y and Weers, T and Peters, RJ}, title = {Double-barreled defense: dual ent-miltiradiene synthases in most rice cultivars.}, journal = {aBIOTECH}, volume = {5}, number = {3}, pages = {375-380}, doi = {10.1007/s42994-024-00167-3}, pmid = {39279860}, issn = {2662-1738}, abstract = {UNLABELLED: Rice (Oryza sativa) produces numerous diterpenoid phytoalexins that are important in defense against pathogens. Surprisingly, despite extensive previous investigations, a major group of such phytoalexins, the abietoryzins, were only recently reported. These aromatic abietanes are presumably derived from ent-miltiradiene, but such biosynthetic capacity has not yet been reported in O. sativa. While wild rice has been reported to contain such an enzyme, specifically ent-kaurene synthase-like 10 (KSL10), the only characterized ortholog from O. sativa (OsKSL10), specifically from the well-studied cultivar (cv.) Nipponbare, instead has been shown to make ent-sandaracopimaradiene, precursor to the oryzalexins. Notably, in many other cultivars, OsKSL10 is accompanied by a tandem duplicate, termed here OsKSL14. Biochemical characterization of OsKLS14 from cv. Kitaake demonstrates that this produces the expected abietoryzin precursor ent-miltiradiene. Strikingly, phylogenetic analysis of OsKSL10 across the rice pan-genome reveals that from cv. Nipponbare is an outlier, whereas the alleles from most other cultivars group with those from wild rice, suggesting that these also might produce ent-miltiradiene. Indeed, OsKSL10 from cv. Kitaake exhibits such activity as well, consistent with its production of abietoryzins but not oryzalexins. Similarly consistent with these results is the lack of abietoryzin production by cv. Nipponbare. Although their equivalent product outcome might suggest redundancy, OsKSL10 and OsKSL14 were observed to exhibit distinct expression patterns, indicating such differences may underlie retention of these duplicated genes. Regardless, the results reported here clarify abietoryzin biosynthesis and provide insight into the evolution of rice diterpenoid phytoalexins.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s42994-024-00167-3.}, } @article {pmid39278956, year = {2024}, author = {Hou, Y and Gan, J and Fan, Z and Sun, L and Garg, V and Wang, Y and Li, S and Bao, P and Cao, B and Varshney, RK and Zhao, H}, title = {Haplotype-based pangenomes reveal genetic variations and climate adaptations in moso bamboo populations.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {8085}, pmid = {39278956}, issn = {2041-1723}, support = {32271975//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Haplotypes ; *Poaceae/genetics ; *Climate Change ; *Genetic Variation ; *Genome, Plant ; China ; Adaptation, Physiological/genetics ; Acclimatization/genetics ; }, abstract = {Moso bamboo (Phyllostachys edulis), an ecologically and economically important forest species in East Asia, plays vital roles in carbon sequestration and climate change mitigation. However, intensifying climate change threatens moso bamboo survival. Here we generate high-quality haplotype-based pangenome assemblies for 16 representative moso bamboo accessions and integrated these assemblies with 427 previously resequenced accessions. Characterization of the haplotype-based pangenome reveals extensive genetic variation, predominantly between haplotypes rather than within accessions. Many genes with allele-specific expression patterns are implicated in climate responses. Integrating spatiotemporal climate data reveals more than 1050 variations associated with pivotal climate factors, including temperature and precipitation. Climate-associated variations enable the prediction of increased genetic risk across the northern and western regions of China under future emissions scenarios, underscoring the threats posed by rising temperatures. Our integrated haplotype-based pangenome elucidates moso bamboo's local climate adaptation mechanisms and provides critical genomic resources for addressing intensifying climate pressures on this essential bamboo. More broadly, this study demonstrates the power of long-read sequencing in dissecting adaptive traits in climate-sensitive species, advancing evolutionary knowledge to support conservation.}, } @article {pmid39273946, year = {2024}, author = {Wu, Y and Wang, F and Lyu, K and Liu, R}, title = {Comparative Analysis of Transposable Elements in the Genomes of Citrus and Citrus-Related Genera.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {17}, pages = {}, doi = {10.3390/plants13172462}, pmid = {39273946}, issn = {2223-7747}, support = {No grant number//Fujian Agriculture and Forestry University/ ; }, abstract = {Transposable elements (TEs) significantly contribute to the evolution and diversity of plant genomes. In this study, we explored the roles of TEs in the genomes of Citrus and Citrus-related genera by constructing a pan-genome TE library from 20 published genomes of Citrus and Citrus-related accessions. Our results revealed an increase in TE content and the number of TE types compared to the original annotations, as well as a decrease in the content of unclassified TEs. The average length of TEs per assembly was approximately 194.23 Mb, representing 41.76% (Murraya paniculata) to 64.76% (Citrus gilletiana) of the genomes, with a mean value of 56.95%. A significant positive correlation was found between genome size and both the number of TE types and TE content. Consistent with the difference in mean whole-genome size (39.83 Mb) between Citrus and Citrus-related genera, Citrus genomes contained an average of 34.36 Mb more TE sequences than Citrus-related genomes. Analysis of the estimated insertion time and half-life of long terminal repeat retrotransposons (LTR-RTs) suggested that TE removal was not the primary factor contributing to the differences among genomes. These findings collectively indicate that TEs are the primary determinants of genome size and play a major role in shaping genome structures. Principal coordinate analysis (PCoA) of Gene Ontology (GO) and Kyoto Encyclopedia of Genes and Genomes (KEGG) identifiers revealed that the fragmented TEs were predominantly derived from ancestral genomes, while intact TEs were crucial in the recent evolutionary diversification of Citrus. Moreover, the presence or absence of intact TEs near the AdhE superfamily was closely associated with the bitterness trait in the Citrus species. Overall, this study enhances TE annotation in Citrus and Citrus-related genomes and provides valuable data for future genetic breeding and agronomic trait research in Citrus.}, } @article {pmid39273624, year = {2024}, author = {Song, Y and Han, S and Wang, M and Ni, X and Huang, X and Zhang, Y}, title = {Pangenome Identification and Analysis of Terpene Synthase Gene Family Members in Gossypium.}, journal = {International journal of molecular sciences}, volume = {25}, number = {17}, pages = {}, doi = {10.3390/ijms25179677}, pmid = {39273624}, issn = {1422-0067}, support = {32272638 and 31701800//National Natural Science Foundation of China/ ; }, mesh = {*Gossypium/genetics/enzymology ; *Alkyl and Aryl Transferases/genetics/metabolism ; *Multigene Family ; *Genome, Plant ; *Phylogeny ; Plant Proteins/genetics/metabolism ; Terpenes/metabolism ; Gene Expression Regulation, Plant ; }, abstract = {Terpene synthases (TPSs), key gatekeepers in the biosynthesis of herbivore-induced terpenes, are pivotal in the diversity of terpene chemotypes across and within plant species. Here, we constructed a gene-based pangenome of the Gossypium genus by integrating the genomes of 17 diploid and 10 tetraploid species. Within this pangenome, 208 TPS syntelog groups (SGs) were identified, comprising 2 core SGs (TPS5 and TPS42) present in all 27 analyzed genomes, 6 softcore SGs (TPS11, TPS12, TPS13, TPS35, TPS37, and TPS47) found in 25 to 26 genomes, 131 dispensable SGs identified in 2 to 24 genomes, and 69 private SGs exclusive to a single genome. The mutational load analysis of these identified TPS genes across 216 cotton accessions revealed a great number of splicing variants and complex splicing patterns. The nonsynonymous/synonymous Ka/Ks value for all 52 analyzed TPS SGs was less than one, indicating that these genes were subject to purifying selection. Of 208 TPS SGs encompassing 1795 genes, 362 genes derived from 102 SGs were identified as atypical and truncated. The structural analysis of TPS genes revealed that gene truncation is a major mechanism contributing to the formation of atypical genes. An integrated analysis of three RNA-seq datasets from cotton plants subjected to herbivore infestation highlighted nine upregulated TPSs, which included six previously characterized TPSs in G. hirsutum (AD1_TPS10, AD1_TPS12, AD1_TPS40, AD1_TPS42, AD1_TPS89, and AD1_TPS104), two private TPSs (AD1_TPS100 and AD2_TPS125), and one atypical TPS (AD2_TPS41). Also, a TPS-associated coexpression module of eight genes involved in the terpenoid biosynthesis pathway was identified in the transcriptomic data of herbivore-infested G. hirsutum. These findings will help us understand the contributions of TPS family members to interspecific terpene chemotypes within Gossypium and offer valuable resources for breeding insect-resistant cotton cultivars.}, } @article {pmid39268542, year = {2024}, author = {Olson, MA and Cullimore, C and Hutchison, WD and Grimsrud, A and Nobrega, D and De Buck, J and Barkema, HW and Wilson, E and Pickett, BE and Erickson, DL}, title = {Genes associated with fitness and disease severity in the pan-genome of mastitis-associated Escherichia coli.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1452007}, pmid = {39268542}, issn = {1664-302X}, abstract = {INTRODUCTION: Bovine mastitis caused by Escherichia coli compromises animal health and inflicts substantial product losses in dairy farming. It may manifest as subclinical through severe acute disease and can be transient or persistent in nature. Little is known about bacterial factors that impact clinical outcomes or allow some strains to outcompete others in the mammary gland (MG) environment. Mastitis-associated E. coli (MAEC) may have distinctive characteristics which may contribute to the varied nature of the disease. Given their high levels of intraspecies genetic variability, virulence factors of commonly used MAEC model strains may not be relevant to all members of this group.

METHODS: In this study, we sequenced the genomes of 96 MAEC strains isolated from cattle with clinical mastitis (CM). We utilized clinical severity data to perform genome-wide association studies to identify accessory genes associated with strains isolated from mild or severe CM, or with high or low competitive fitness during in vivo competition assays. Genes associated with mastitis pathogens or commensal strains isolated from bovine sources were also identified.

RESULTS: A type-2 secretion system (T2SS) and a chitinase (ChiA) exported by this system were strongly associated with pathogenic isolates compared with commensal strains. Deletion of chiA from MAEC isolates decreased their adherence to cultured bovine mammary epithelial cells.

DISCUSSION: The increased fitness associated with strains possessing this gene may be due to better attachment in the MG. Overall, these results provide a much richer understanding of MAEC and suggest bacterial processes that may underlie the clinical diversity associated with mastitis and their adaptation to this unique environment.}, } @article {pmid39264185, year = {2024}, author = {Magar, S and Kolte, V and Sharma, G and Govindarajan, S}, title = {Exploring pangenomic diversity and CRISPR-Cas evasion potential in jumbo phages: a comparative genomics study.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0420023}, doi = {10.1128/spectrum.04200-23}, pmid = {39264185}, issn = {2165-0497}, abstract = {UNLABELLED: Jumbo phages are characterized by their remarkably large-sized genome and unique life cycles. Jumbo phages belonging to Chimalliviridae family protect the replicating phage DNA from host immune systems like CRISPR-Cas and restriction-modification system through a phage nucleus structure. Several recent studies have provided new insights into jumbo phage infection biology, but the pan-genome diversity of jumbo phages and their relationship with CRISPR-Cas targeting beyond Chimalliviridae are not well understood. In this study, we used pan-genome analysis to identify orthologous gene families shared among 331 jumbo phages with complete genomes. We show that jumbo phages lack a universally conserved set of core genes but identified seven "soft-core genes" conserved in over 50% of these phages. These genes primarily govern DNA-related activities, such as replication, repair, or nucleotide synthesis. Jumbo phages exhibit a wide array of accessory and unique genes, underscoring their genetic diversity. Phylogenetic analyses of the soft-core genes revealed frequent horizontal gene transfer events between jumbo phages, non-jumbo phages, and occasionally even giant eukaryotic viruses, indicating a polyphyletic evolutionary nature. We categorized jumbo phages into 11 major viral clusters (VCs) spanning 130 sub-clusters, with the majority being multi-genus jumbo phage clusters. Moreover, through the analysis of hallmark genes related to CRISPR-Cas targeting, we predict that many jumbo phages can evade host immune systems using both known and yet-to-be-identified mechanisms. In summary, our study enhances our understanding of jumbo phages, shedding light on their pan-genome diversity and remarkable genome protection capabilities.

IMPORTANCE: Jumbo phages are large bacterial viruses known for more than 50 years. However, only in recent years, a significant number of complete genome sequences of jumbo phages have become available. In this study, we employed comparative genomic approaches to investigate the genomic diversity and genome protection capabilities of the 331 jumbo phages. Our findings revealed that jumbo phages exhibit high genetic diversity, with only a few genes being relatively conserved across jumbo phages. Interestingly, our data suggest that jumbo phages employ yet-to-be-identified strategies to protect their DNA from the host immune system, such as CRISPR-Cas.}, } @article {pmid39261641, year = {2024}, author = {Sirén, J and Eskandar, P and Ungaro, MT and Hickey, G and Eizenga, JM and Novak, AM and Chang, X and Chang, PC and Kolmogorov, M and Carroll, A and Monlong, J and Paten, B}, title = {Personalized pangenome references.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {39261641}, issn = {1548-7105}, support = {R01HG010485//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG010262//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG011853//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U01HG010961//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; OT2OD033761//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; OT3HL142481//U.S. Department of Health & Human Services | NIH | National Heart, Lung, and Blood Institute (NHLBI)/ ; }, abstract = {Pangenomes reduce reference bias by representing genetic diversity better than a single reference sequence. Yet when comparing a sample to a pangenome, variants in the pangenome that are not part of the sample can be misleading, for example, causing false read mappings. These irrelevant variants are generally rarer in terms of allele frequency, and have previously been dealt with by filtering rare variants. However, this blunt heuristic both fails to remove some irrelevant variants and removes many relevant variants. We propose a new approach that imputes a personalized pangenome subgraph by sampling local haplotypes according to k-mer counts in the reads. We implement the approach in the vg toolkit (https://github.com/vgteam/vg) for the Giraffe short-read aligner and compare its accuracy to state-of-the-art methods using human pangenome graphs from the Human Pangenome Reference Consortium. This reduces small variant genotyping errors by four times relative to the Genome Analysis Toolkit and makes short-read structural variant genotyping of known variants competitive with long-read variant discovery methods.}, } @article {pmid39259908, year = {2024}, author = {Thorgersen, MP and Goff, JL and Trotter, VV and Poole, FL and Arkin, AP and Deutschbauer, AM and Adams, MWW}, title = {Fitness factors impacting survival of a subsurface bacterium in contaminated groundwater.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae176}, pmid = {39259908}, issn = {1751-7370}, abstract = {Many factors contribute to the ability of a microbial species to persist when encountering complexly contaminated environments including time of exposure, the nature and concentration of contaminants, availability of nutritional resources, and possession of a combination of appropriate molecular mechanisms needed for survival. Herein we sought to identify genes that are most important for survival of Gram-negative Enterobacteriaceae in contaminated groundwater environments containing high concentrations of nitrate and metals using the metal-tolerant Oak Ridge Reservation (ORR) isolate, Pantoea sp. MT58 (MT58). Survival fitness experiments in which a randomly barcoded transposon insertion (RB-TnSeq) library of MT58 was exposed directly to contaminated ORR groundwater samples from across a nitrate and mixed metal contamination plume were used to identify genes important for survival with increasing exposure times and concentrations of contaminants, and availability of a carbon source. Genes involved in controlling and using carbon, encoding transcriptional regulators, and related to Gram-negative outer membrane processes were among those found to be important for survival in contaminated ORR groundwater. A comparative genomics analysis of 75 Pantoea genus strains allowed us to further separate the survival determinants into core and non-core genes in the Pantoea pangenome, revealing insights into the survival of subsurface microorganisms during contaminant plume intrusion.}, } @article {pmid39257004, year = {2024}, author = {Liu, Z and Yang, F and Wan, H and Deng, C and Hu, W and Fan, X and Wang, J and Yang, M and Feng, J and Wang, Q and Yang, N and Cai, L and Liu, Y and Tang, H and Li, S and Luo, J and Zheng, J and Wu, L and Yang, E and Pu, Z and Jia, J and Li, J and Yang, W}, title = {Genome architecture of the allotetraploid wild grass Aegilops ventricosa reveals its evolutionary history and contributions to wheat improvement.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101131}, doi = {10.1016/j.xplc.2024.101131}, pmid = {39257004}, issn = {2590-3462}, abstract = {The allotetraploid wild grass Aegilops ventricosa (2n=4X=28, genome D[v]D[v]N[v]N[v]) has been recognized as an important germplasm resource for wheat improvement due to its ability to tolerate biotic stresses. Especially 2N[v]S segment from Aegilops ventricosa, as a stable and effective resistance source, has greatly contributed to wheat improvement. The 2N[v]S/2AS translocation is a prevalent chromosomal translocation between common wheat and wild relatives, ranking just behind the 1B/1R translocation in importance for modern wheat breeding. Here, we assembled a high-quality chromosome-level reference genome of Ae. ventricosa RM271 with a total length of 8.67 Gb. Phylogenomic analyses revealed that the progenitor of the D[v] subgenome of Ae. ventricosa was Ae. tauschii ssp. tauschii (genome DD); in contrast, the progenitor of the D subgenome of bread wheat (Triticum aestivum L.) was Ae. tauschii ssp. strangulata (genome DD). The oldest polyploidization time of Ae. ventricosa occurred ∼0.7 million years ago. The D[v] subgenome of Ae. ventricosa was less conserved than the D subgenome of bread wheat. Construction of a graph-based pangenome of 2AS/6N[v]L (originally known as 2N[v]S) segments from Ae. ventricosa and other genomes in the Triticeae enables us identifying candidate resistance genes sourced from Ae. ventricosa. We identified 12 nonredundant introgressed segments from the D[v] and N[v] subgenomes using a large winter wheat collection representing the full diversity of the wheat European genetic pool, and 29.40% of European wheat varieties inherited at least one of these segments. The high-quality RM271 reference genome will provide a basis for cloning key genes, including the Yr17-Lr37-Sr38-Cre5 resistance gene cluster in Ae. ventricosa, and facilitate the full use of elite wild genetic resources to accelerate wheat improvement.}, } @article {pmid39256695, year = {2024}, author = {Li, X and Huo, L and Li, X and Zhang, C and Gu, M and Fan, J and Xu, C and Gong, J and Hu, X and Zheng, Y and Sun, X}, title = {Genomes of diverse Actinidia species provide insights into cis-regulatory motifs and genes associated with critical traits.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {200}, pmid = {39256695}, issn = {1741-7007}, support = {LR23C150001//Zhejiang Provincial Natural Science Foundation of China/ ; }, abstract = {BACKGROUND: Kiwifruit, belonging to the genus Actinidia, represents a unique fruit crop characterized by its modern cultivars being genetically diverse and exhibiting remarkable variations in morphological traits and adaptability to harsh environments. However, the genetic mechanisms underlying such morphological diversity remain largely elusive.

RESULTS: We report the high-quality genomes of five Actinidia species, including Actinidia longicarpa, A. macrosperma, A. polygama, A. reticulata, and A. rufa. Through comparative genomics analyses, we identified three whole genome duplication events shared by the Actinidia genus and uncovered rapidly evolving gene families implicated in the development of characteristic kiwifruit traits, including vitamin C (VC) content and fruit hairiness. A range of structural variations were identified, potentially contributing to the phenotypic diversity in kiwifruit. Notably, phylogenomic analyses revealed 76 cis-regulatory elements within the Actinidia genus, predominantly associated with stress responses, metabolic processes, and development. Among these, five motifs did not exhibit similarity to known plant motifs, suggesting the presence of possible novel cis-regulatory elements in kiwifruit. Construction of a pan-genome encompassing the nine Actinidia species facilitated the identification of gene DTZ79_23g14810 specific to species exhibiting extraordinarily high VC content. Expression of DTZ79_23g14810 is significantly correlated with the dynamics of VC concentration, and its overexpression in the transgenic roots of kiwifruit plants resulted in increased VC content.

CONCLUSIONS: Collectively, the genomes and pan-genome of diverse Actinidia species not only enhance our understanding of fruit development but also provide a valuable genomic resource for facilitating the genome-based breeding of kiwifruit.}, } @article {pmid39253572, year = {2024}, author = {Duan, S and Yan, L and Shen, Z and Li, X and Chen, B and Li, D and Qin, H and Meegahakumbura, MK and Wambulwa, MC and Gao, L and Chen, W and Dong, Y and Sheng, J}, title = {Genomic analyses of agronomic traits in tea plants and related Camellia species.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1449006}, doi = {10.3389/fpls.2024.1449006}, pmid = {39253572}, issn = {1664-462X}, abstract = {The genus Camellia contains three types of domesticates that meet various needs of ancient humans: the ornamental C. japonica, the edible oil-producing C. oleifera, and the beverage-purposed tea plant C. sinensis. The genomic drivers of the functional diversification of Camellia domesticates remain unknown. Here, we present the genomic variations of 625 Camellia accessions based on a new genome assembly of C. sinensis var. assamica ('YK10'), which consists of 15 pseudo-chromosomes with a total length of 3.35 Gb and a contig N50 of 816,948 bp. These accessions were mainly distributed in East Asia, South Asia, Southeast Asia, and Africa. We profiled the population and subpopulation structure in tea tree Camellia to find new evidence for the parallel domestication of C. sinensis var. assamica (CSA) and C. sinensis var. sinensis (CSS). We also identified candidate genes associated with traits differentiating CSA, CSS, oilseed Camellia, and ornamental Camellia cultivars. Our results provide a unique global view of the genetic diversification of Camellia domesticates and provide valuable resources for ongoing functional and molecular breeding research.}, } @article {pmid39252931, year = {2024}, author = {Stanley, S and Silva-Costa, C and Gomes-Silva, J and Melo-Cristino, J and Malley, R and Ramirez, M}, title = {CC180 clade dynamics does not universally explain Streptococcus pneumoniae serotype 3 persistence post-vaccine: a global comparative population genomics study.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.29.24312665}, pmid = {39252931}, abstract = {BACKGROUND: Clonal complex 180 (CC180) is currently the major clone of serotype 3 Streptococcus pneumoniae (Spn). The 13-valent pneumococcal conjugate vaccine (PCV13) does not have significant efficacy against serotype 3 despite polysaccharide inclusion in the vaccine. It was hypothesized that PCV13 may effectively control Clade I of CC180 but that Clades III and IV are resistant, provoking a population shift that enables serotype 3 persistence. This has been observed in the United States, England, and Wales but not Spain. We tested this hypothesis further utilizing a dataset from Portugal.

METHODS: We whole-genome sequenced (WGS) 501 serotype 3 strains from Portugal isolated from patients with pneumococcal infections between 1999-2020. The draft genomes underwent phylogenetic analyses, pangenome profiling, and a genome-wide association study (GWAS). We also completed antibiotic susceptibility testing and compiled over 2,600 serotype 3 multilocus sequence type 180 (MLST180) WGSs to perform global comparative genomics.

FINDINGS: CC180 Clades I, II, III, IV, and VI distributions were similar when comparing non-invasive pneumonia isolates and invasive disease isolates (Fisher's exact test, P=0.29), and adult and pediatric cases (Fisher's exact test, P=0.074). The serotype 3 CCs shifted post-PCV13 (Fisher's exact test, P<0.0001) and Clade I became dominant. Clade I is largely antibiotic-sensitive and carries the ΦOXC141 prophage but the pangenome is heterogenous. Strains from Portugal and Spain, where Clade I remains dominant post-PCV13, have larger pangenomes and are associated with the presence of two genes encoding hypothetical proteins.

INTERPRETATION: Clade I became dominant in Portugal post-PCV13, despite the burden of the prophage and antibiotic sensitivity. The accessory genome content may mitigate these fitness costs. Regional differences in Clade I prevalence and pangenome heterogeneity suggest that clade dynamics is not a generalizable approach to understanding serotype 3 vaccine escape.

FUNDING: National Institute of Child Health and Human Development, Pfizer, and Merck Sharp & Dohme.

RESEARCH IN CONTEXT: Evidence before this study: We conducted this study because of the mounting interest surrounding the changing prevalence of serotype 3 Streptococcus pneumoniae (Spn) genetic lineages and the potential association with escape from 13-valent pneumococcal conjugate vaccine (PCV13) control. To inform our investigation, we searched the PubMed database using different combinations of the following keywords: "Streptococcus pneumoniae", "serotype 3", "CC180", "PCV13", "Clade Iα", "Clade Iβ", and "Clade II". The search included all English language primary research articles published before July 1 [st] , 2024; this language limitation may bias the results of our assessment. Most ST3 isolates belong to clonal complex 180 (CC180), and one study identified three major lineages within CC180: Clade Iα, Clade Iβ, and Clade II. This study observed a global trend of increasing Clade II prevalence with a concomitant decrease in Clade I prevalence over time, which was associated with the introduction of PCV13 in the United States. A report from England and Wales made a similar observation. It was therefore hypothesized that PCV13 may be effective at controlling Clade Iα and that Clade II is driving vaccine escape. Later work refined the clade classification system as follows: Clade I (Clade Iα), Clades II and VI (Clade Iβ), Clades III and IV (Clade II), and Clade V. Clade I strains are marked by a significantly lower recombination rate partly due to the presence of a lineage-specific prophage interfering with competence development, which is a potential mechanism explaining the possible reduced fitness of Clade I. Clade I is also noted to be mostly antibiotic-susceptible. However, a recent study found that Clade I persists as a dominant serotype 3 lineage in Spain, so the generalizability and implications of clade dynamics remain unclear. Added value of this study: Early work assessing the association between changes in serotype 3 clade prevalence and PCV13 was limited by small sample sizes. In addition, studies investigating differences in clade dynamics did not comprehensively consider patient age or disease manifestations such as non-invasive pneumonia and invasive infections. In this study, we evaluated 501 serotype 3 strains from Portugal to investigate clade dynamics. This must be explored in different geographic contexts for a more robust understanding of changing serotype 3 population genomics. We also sought to define genetic determinants linked to strains from regions in which Clade I remains dominant. This is an important step towards a more mechanistic understanding of the serotype 3 CC180 lineage fitness landscape.Implications of all the available evidence: Unlike other serotypes covered by PCV13, serotype 3 has evaded vaccine control. It has been suggested that Clade I prevalence has decreased due to PCV13, which has created an expanded niche for strains from other clades and ultimately renders PCV13 less effective against serotype 3. This postulation has important implications for the future design of an improved vaccine, so this hypothesis must be thoroughly tested in diverse contexts. We find that Clade I remains the dominant lineage in Portugal even after the introduction of PCV13. We delineate Clade I pangenome heterogeneity and show that strains from Portugal and Spain share similar pangenome features in contrast to Clade I strains from regions where Clade I decreased in prevalence, which should motivate future studies to elucidate more generalizable population genomics trends that may better inform strategies for the design of an improved vaccine.}, } @article {pmid39251928, year = {2024}, author = {Zorigt, T and Furuta, Y and Paudel, A and Kamboyi, HK and Shawa, M and Chuluun, M and Sugawara, M and Enkhtsetseg, N and Enkhtuya, J and Battsetseg, B and Munyeme, M and Hang'ombe, BM and Higashi, H}, title = {Pan-genome analysis reveals novel chromosomal markers for multiplex PCR-based specific detection of Bacillus anthracis.}, journal = {BMC infectious diseases}, volume = {24}, number = {1}, pages = {942}, pmid = {39251928}, issn = {1471-2334}, support = {23K19460//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; 21K15430//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; 18K19436//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; JP23wm0125008//The Japan Program for Infectious Diseases Research and Infrastructure (JIDRI) from the Japan Agency for Medical Research and Development (AMED)/ ; }, abstract = {BACKGROUND: Bacillus anthracis is a highly pathogenic bacterium that can cause lethal infection in animals and humans, making it a significant concern as a pathogen and biological agent. Consequently, accurate diagnosis of B. anthracis is critically important for public health. However, the identification of specific marker genes encoded in the B. anthracis chromosome is challenging due to the genetic similarity it shares with B. cereus and B. thuringiensis.

METHODS: The complete genomes of B. anthracis, B. cereus, B. thuringiensis, and B. weihenstephanensis were de novo annotated with Prokka, and these annotations were used by Roary to produce the pan-genome. B. anthracis exclusive genes were identified by Perl script, and their specificity was examined by nucleotide BLAST search. A local BLAST alignment was performed to confirm the presence of the identified genes across various B. anthracis strains. Multiplex polymerase chain reactions (PCR) were established based on the identified genes.

RESULT: The distribution of genes among 151 whole-genome sequences exhibited three distinct major patterns, depending on the bacterial species and strains. Further comparative analysis between the three groups uncovered thirty chromosome-encoded genes exclusively present in B. anthracis strains. Of these, twenty were found in known lambda prophage regions, and ten were in previously undefined region of the chromosome. We established three distinct multiplex PCRs for the specific detection of B. anthracis by utilizing three of the identified genes, BA1698, BA5354, and BA5361.

CONCLUSION: The study identified thirty chromosome-encoded genes specific to B. anthracis, encompassing previously described genes in known lambda prophage regions and nine newly discovered genes from an undefined gene region to the best of our knowledge. Three multiplex PCR assays offer an accurate and reliable alternative method for detecting B. anthracis. Furthermore, these genetic markers have value in anthrax vaccine development, and understanding the pathogenicity of B. anthracis.}, } @article {pmid39251347, year = {2024}, author = {Ou, S and Scheben, A and Collins, T and Qiu, Y and Seetharam, AS and Menard, CC and Manchanda, N and Gent, JI and Schatz, MC and Anderson, SN and Hufford, MB and Hirsch, CN}, title = {Differences in activity and stability drive transposable element variation in tropical and temperate maize.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278131.123}, pmid = {39251347}, issn = {1549-5469}, abstract = {Much of the profound interspecific variation in genome content has been attributed to transposable elements (TEs). To explore the extent of TE variation within species, we developed an optimized open-source algorithm, panEDTA, to de novo annotate TEs in a pangenome context. We then generated a unified TE annotation for a maize pangenome derived from 26 reference-quality genomes, which reveals an excess of 35.1 Mb of TE sequences per genome in tropical maize relative to temperate maize. A small number (n = 216) of TE families, mainly LTR retrotransposons, drive these differences. Evidence from the methylome, transcriptome, LTR age distribution, and LTR insertional polymorphisms reveals that 64.7% of the variability is contributed by LTR families that are young, less methylated, and more expressed in tropical maize, whereas 18.5% is driven by LTR families with removal or loss in temperate maize. Additionally, we find enrichment for Young LTR families adjacent to nucleotide-binding and leucine-rich repeat (NLR) clusters of varying copy number across lines, suggesting TE activity may be associated with disease resistance in maize.}, } @article {pmid39251346, year = {2024}, author = {Hung, TK and Liu, WC and Lai, SK and Chuang, HW and Lee, YC and Lin, HY and Hsu, CL and Chen, CY and Yang, YC and Hsu, JS and Chen, PL}, title = {Genetic complexity of killer-cell immunoglobulin-like receptor genes in human pangenome assemblies.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278358.123}, pmid = {39251346}, issn = {1549-5469}, abstract = {The killer-cell immunoglobulin-like receptor (KIR) gene complex, a highly polymorphic region of the human genome that encodes proteins involved in immune responses, poses strong challenges in genotyping owing to its remarkable genetic diversity and structural intricacy. Accurate analysis of KIR alleles, including their structural variations, is crucial for understanding their roles in various immune responses. Leveraging the high-quality genome assemblies from the Human Pangenome Reference Consortium (HPRC), we present a novel bioinformatic tool, the structural KIR annoTator (SKIRT), to investigate gene diversity and facilitate precise KIR allele analysis. In 47 HPRC-phased assemblies, SKIRT identifies a recurrent novel KIR2DS4/3DL1 fusion gene in the paternal haplotype of HG02630 and maternal haplotype of NA19240. Additionally, SKIRT accurately identifies eight structural variants and 15 novel nonsynonymous alleles, all of which are independently validated using short-read data or quantitative polymerase chain reaction. Our study has discovered a total of 570 novel alleles, among which eight haplotypes harbor at least one KIR gene duplication, six haplotypes have lost at least one framework gene, and 75 out of 94 haplotypes (79.8%) carry at least five novel alleles, thus confirming KIR genetic diversity. These findings are pivotal in providing insights into KIR gene diversity and serve as a solid foundation for understanding the functional consequences of KIR structural variations. High-resolution genome assemblies offer unprecedented opportunities to explore polymorphic regions that are challenging to investigate using short-read sequencing methods. The SKIRT pipeline emerges as a highly efficient tool, enabling the comprehensive detection of the complete spectrum of KIR alleles within human genome assemblies.}, } @article {pmid39245770, year = {2024}, author = {Kenneally, C and Murphy, CP and Sleator, RD and Culligan, EP}, title = {Genotypic and phenotypic characterisation of asymptomatic bacteriuria (ABU) isolates displaying bacterial interference against multi-drug resistant uropathogenic E. Coli.}, journal = {Archives of microbiology}, volume = {206}, number = {10}, pages = {394}, pmid = {39245770}, issn = {1432-072X}, mesh = {Humans ; *Bacteriuria/microbiology ; *Uropathogenic Escherichia coli/genetics/drug effects/isolation & purification/classification ; *Escherichia coli Infections/microbiology ; *Drug Resistance, Multiple, Bacterial/genetics ; *Genotype ; *Phenotype ; *Urinary Tract Infections/microbiology ; *Anti-Bacterial Agents/pharmacology ; Virulence/genetics ; Phylogeny ; Adult ; Virulence Factors/genetics ; Genome, Bacterial ; Microbial Sensitivity Tests ; }, abstract = {Escherichia coli can colonise the urogenital tract of individuals without causing symptoms of infection, in a condition referred to as asymptomatic bacteriuria (ABU). ABU isolates can protect the host against symptomatic urinary tract infections (UTIs) by bacterial interference against uropathogenic E. coli (UPEC). The aim of this study was to investigate the genotypic and phenotypic characteristics of five ABU isolates from midstream urine samples of adults. Comparative genomic and phenotypic analysis was conducted including an antibiotic resistance profile, pangenome analysis, and a putative virulence profile. Based on the genome analysis, the isolates consisted of one from phylogroup A, three from phylogroup B2, and one from phylogroup D. Two of the isolates, PUTS 58 and SK-106-1, were noted for their lack of antibiotic resistance and virulence genes compared to the prototypic ABU strain E. coli 83,972. This study provides insights into the genotypic and phenotypic profiles of uncharacterised ABU isolates, and how relevant fitness and virulence traits can impact their potential suitability for therapeutic bacterial interference.}, } @article {pmid39244587, year = {2024}, author = {Campbell, AM and Gavilan, RG and Abanto Marin, M and Yang, C and Hauton, C and van Aerle, R and Martinez-Urtaza, J}, title = {Evolutionary dynamics of the successful expansion of pandemic Vibrio parahaemolyticus ST3 in Latin America.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7828}, pmid = {39244587}, issn = {2041-1723}, support = {2021 SGR 00526//Generalitat de Catalunya (Government of Catalonia)/ ; NE/S007210/1//RCUK | Natural Environment Research Council (NERC)/ ; }, mesh = {*Vibrio parahaemolyticus/genetics/isolation & purification/classification ; Latin America/epidemiology ; *Evolution, Molecular ; *Vibrio Infections/epidemiology/microbiology ; Humans ; *Phylogeny ; Genome, Bacterial/genetics ; Pandemics ; Mutation ; }, abstract = {The underlying evolutionary mechanisms driving global expansions of pathogen strains are poorly understood. Vibrio parahaemolyticus is one of only two marine pathogens where variants have emerged in distinct climates globally. The success of a Vibrio parahaemolyticus clone (VpST3) in Latin America- the first spread identified outside its endemic region of tropical Asia- provided an invaluable opportunity to investigate mechanisms of VpST3 expansion into a distinct marine climate. A global collection of VpST3 isolates and novel Latin American isolates were used for evolutionary population genomics, pangenome analysis and combined with oceanic climate data. We found a VpST3 population (LatAm-VpST3) introduced in Latin America well before the emergence of this clone in India, previously considered the onset of the VpST3 epidemic. LatAm-VpST3 underwent successful adaptation to local conditions over its evolutionary divergence from Asian VpST3 isolates, to become dominant in Latin America. Selection signatures were found in genes providing resilience to the distinct marine climate. Core genome mutations and accessory gene presences that promoted survival over long dispersals or increased environmental fitness were associated with environmental conditions. These results provide novel insights into the global expansion of this successful V. parahaemolyticus clone into regions with different climate scenarios.}, } @article {pmid39243017, year = {2024}, author = {Kim, HS and Haley, OC and Portwood Ii, JL and Harding, S and Proctor, RH and Woodhouse, MR and Sen, TZ and Andorf, CM}, title = {Fusarium Protein Toolkit: a web-based resource for structural and variant analysis of Fusarium species.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {326}, pmid = {39243017}, issn = {1471-2180}, support = {5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 0201-88888-003-000D and 0201-88888-002-000D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; 5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; 2030-21000-056-000-D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; }, mesh = {*Fusarium/genetics/metabolism/classification ; *Fungal Proteins/genetics/chemistry/metabolism ; *Internet ; Genome, Fungal/genetics ; Genetic Variation ; Models, Molecular ; Software ; Protein Conformation ; }, abstract = {BACKGROUND: The genus Fusarium poses significant threats to food security and safety worldwide because numerous species of the fungus cause destructive diseases and/or mycotoxin contamination in crops. The adverse effects of climate change are exacerbating some existing threats and causing new problems. These challenges highlight the need for innovative solutions, including the development of advanced tools to identify targets for control strategies.

DESCRIPTION: In response to these challenges, we developed the Fusarium Protein Toolkit (FPT), a web-based tool that allows users to interrogate the structural and variant landscape within the Fusarium pan-genome. The tool displays both AlphaFold and ESMFold-generated protein structure models from six Fusarium species. The structures are accessible through a user-friendly web portal and facilitate comparative analysis, functional annotation inference, and identification of related protein structures. Using a protein language model, FPT predicts the impact of over 270 million coding variants in two of the most agriculturally important species, Fusarium graminearum and F. verticillioides. To facilitate the assessment of naturally occurring genetic variation, FPT provides variant effect scores for proteins in a Fusarium pan-genome based on 22 diverse species. The scores indicate potential functional consequences of amino acid substitutions and are displayed as intuitive heatmaps using the PanEffect framework.

CONCLUSION: FPT fills a knowledge gap by providing previously unavailable tools to assess structural and missense variation in proteins produced by Fusarium. FPT has the potential to deepen our understanding of pathogenic mechanisms in Fusarium, and aid the identification of genetic targets for control strategies that reduce crop diseases and mycotoxin contamination. Such targets are vital to solving the agricultural problems incited by Fusarium, particularly evolving threats resulting from climate change. Thus, FPT has the potential to contribute to improving food security and safety worldwide.}, } @article {pmid39242972, year = {2024}, author = {Masignani, V and Rappuoli, R and Pizza, M}, title = {Next generation of "magic bullets", solutions from the microbial pangenome.}, journal = {EMBO molecular medicine}, volume = {}, number = {}, pages = {}, pmid = {39242972}, issn = {1757-4684}, } @article {pmid39238887, year = {2024}, author = {Najjari, A and Jabberi, M and Chérif, SF and Cherif, A and Ouzari, HI and Linares-Pastén, JA and Sghaier, H}, title = {Genome and pan-genome analysis of a new exopolysaccharide-producing bacterium Pyschrobacillus sp. isolated from iron ores deposit and insights into iron uptake.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1440081}, pmid = {39238887}, issn = {1664-302X}, abstract = {Bacterial exopolysaccharides (EPS) have emerged as one of the key players in the field of heavy metal-contaminated environmental bioremediation. This study aimed to characterize and evaluate the metal biosorption potential of EPS produced by a novel Psychrobacillus strain, NEAU-3TGS, isolated from an iron ore deposit at Tamra iron mine, northern Tunisia. Genomic and pan-genomic analysis of NEAU-3TGS bacterium with nine validated published Psychrobacillus species was also performed. The results showed that the NEAU-3TGS genome (4.48 Mb) had a mean GC content of 36%, 4,243 coding sequences and 14 RNA genes. Phylogenomic analysis and calculation of nucleotide identity (ANI) values (less than 95% for new species with all strains) confirmed that NEAU-3TGS represents a potential new species. Pangenomic analysis revealed that Psychrobacillus genomic diversity represents an "open" pangenome model with 33,091 homologous genes, including 65 core, 3,738 shell, and 29,288 cloud genes. Structural EPS characterization by attenuated total reflectance-Fourier transform infrared (ATR-FTIR) spectroscopy showed uronic acid and α-1,4-glycosidic bonds as dominant components of the EPS. X-ray diffraction (XRD) analysis revealed the presence of chitin, chitosan, and calcite CaCO3 and confirmed the amorphous nature of the EPS. Heavy metal bioabsorption assessment showed that iron and lead were more adsorbed than copper and cadmium. Notably, the optimum activity was observed at 37°C, pH=7 and after 3 h contact of EPS with each metal. Genomic insights on iron acquisition and metabolism in Psychrobacillus sp. NEAU-3TGS suggested that no genes involved in siderophore biosynthesis were found, and only the gene cluster FeuABCD and trilactone hydrolase genes involved in the uptake of siderophores, iron transporter and exporter are present. Molecular modelling and docking of FeuA (protein peptidoglycan siderophore-binding protein) and siderophores ferrienterobactine [Fe[+3] (ENT)][-3] and ferribacillibactine [Fe[+3] (BB)][-3] ligand revealed that [Fe[+3] (ENT)][-3] binds to Phe122, Lys127, Ile100, Gln314, Arg215, Arg217, and Gln252. Almost the same for [Fe[+3] (ENT)][-3] in addition to Cys222 and Tyr229, but not Ile100.To the best of our knowledge, this is the first report on the characterization of EPS and the adsorption of heavy metals by Psychrobacillus species. The heavy metal removal capabilities may be advantageous for using these organisms in metal remediation.}, } @article {pmid39237905, year = {2024}, author = {Cheng, R and Zhao, Z and Tang, Y and Gu, Y and Chen, G and Sun, Y and Wang, X}, title = {Genome-wide survey of KT/HAK/KUP genes in the genus Citrullus and analysis of their involvement in K[+]-deficiency and drought stress responses in between C. lanatus and C. amarus.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {836}, pmid = {39237905}, issn = {1471-2164}, support = {HABL202228//Huai'an Natural Science Research Project/ ; JBGS[2021]072//Seed Industry Vitalization Research Projects of Jiangsu Province/ ; CARS-25//China Agriculture Research System of MOF and MARA/ ; }, mesh = {*Citrullus/genetics/metabolism/growth & development ; *Stress, Physiological/genetics ; *Plant Proteins/genetics/metabolism ; *Droughts ; *Phylogeny ; Potassium/metabolism ; Gene Expression Regulation, Plant ; Genome, Plant ; Multigene Family ; Cation Transport Proteins/genetics/metabolism ; Potassium Deficiency/genetics/metabolism ; Promoter Regions, Genetic ; }, abstract = {BACKGROUND: The KT/HAK/KUP is the largest K[+] transporter family in plants, playing crucial roles in K[+] absorption, transport, and defense against environmental stress. Sweet watermelon is an economically significant horticultural crop belonging to the genus Citrullus, with a high demand for K[+] during its growth process. However, a comprehensive analysis of the KT/HAK/KUP gene family in watermelon has not been reported.

RESULTS: 14 KT/HAK/KUP genes were identified in the genomes of each of seven Citrullus species. These KT/HAK/KUPs in watermelon were unevenly distributed across seven chromosomes. Segmental duplication is the primary driving force behind the expansion of the KT/HAK/KUP family, subjected to purifying selection during domestication (Ka/Ks < 1), and all KT/HAK/KUPs exhibit conserved motifs and could be phylogenetically classified into four groups. The promoters of KT/HAK/KUPs contain numerous cis-regulatory elements related to plant growth and development, phytohormone response, and stress response. Under K[+] deficiency, the growth of watermelon seedlings was significantly inhibited, with cultivated watermelon experiencing greater impacts (canopy width, redox enzyme activity) compared to the wild type. All KT/HAK/KUPs in C. lanatus and C. amarus exhibit specific expression responses to K[+]-deficiency and drought stress by qRT-PCR. Notably, ClG42_07g0120700/CaPI482276_07g014010 were predominantly expressed in roots and were further induced by K[+]-deficiency and drought stress. Additionally, the K[+] transport capacity of ClG42_07g0120700 under low K[+] stress was confirmed by yeast functional complementation assay.

CONCLUSIONS: KT/HAK/KUP genes in watermelon were systematically identified and analyzed at the pangenome level and provide a foundation for understanding the classification and functions of the KT/HAK/KUPs in watermelon plants.}, } @article {pmid39235714, year = {2024}, author = {de Oliva, BHD and do Nascimento, AB and de Oliveira, JP and Guidone, GHM and Schoeps, BL and Silva, LC and Barbosa, MGL and Montini, VH and de Oliveira Junior, AG and Rocha, SPD}, title = {Genomic insights into a Proteus mirabilis strain inducing avian cellulitis.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {39235714}, issn = {1678-4405}, support = {Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; }, abstract = {Proteus mirabilis, a microorganism distributed in soil, water, and animals, is clinically known for causing urinary tract infections in humans. However, recent studies have linked it to skin infections in broiler chickens, termed avian cellulitis, which poses a threat to animal welfare. While Avian Pathogenic Escherichia coli (APEC) is the primary cause of avian cellulitis, few cases of P. mirabilis involvement are reported, raising questions about the factors facilitating such occurrences. This study employed a pan-genomic approach to investigate whether unique genes exist in P. mirabilis strains causing avian cellulitis. The genome of LBUEL-A33, a P. mirabilis strain known to cause this infection, was assembled, and compared with other P. mirabilis strains isolated from poultry and other sources. Additionally, in silico serogroup analysis was conducted. Results revealed numerous genes unique to the LBUEL-A33 strain. No function in cellulitis was identified for these genes, and in silico investigation of the virulence potential of LBUEL-A33's exclusive proteins proved inconclusive. These findings support that multiple factors are necessary for P. mirabilis to cause avian cellulitis. Furthermore, this species likely employs its own unique arsenal of virulence factors, as many identified mechanisms are analogous to those of E. coli. While antigenic gene clusters responsible for serogroups were identified, no clear trend was observed, and the gene cluster of LBUEL-A33 did not show homology with any sequenced Proteus serogroups. These results reinforce the understanding that this disease is multifactorial, necessitating further research to unravel the mechanisms and underpin the development of control and prevention strategies.}, } @article {pmid38736416, year = {2024}, author = {Brandenburg, JM and Stapleton, GS and Kline, KE and Khoury, J and Mallory, K and Machesky, KD and Ladd-Wilson, SG and Scholz, R and Freiman, J and Schwensohn, C and Palacios, A and Gieraltowski, L and Ellison, Z and Tolar, B and Webb, HE and Tagg, KA and Salah, Z and Nichols, M}, title = {Salmonella Hadar linked to two distinct transmission vehicles highlights challenges to enteric disease outbreak investigations.}, journal = {Epidemiology and infection}, volume = {152}, number = {}, pages = {e86}, doi = {10.1017/S0950268824000682}, pmid = {38736416}, issn = {1469-4409}, mesh = {*Disease Outbreaks ; Animals ; *Salmonella/genetics/classification/isolation & purification ; *Turkeys/microbiology ; *Poultry Diseases/epidemiology/microbiology/transmission ; Humans ; Salmonella Infections, Animal/epidemiology/transmission/microbiology ; Chickens/microbiology ; Multilocus Sequence Typing ; Ducks/microbiology ; Poultry/microbiology ; Salmonella Infections/epidemiology/transmission/microbiology ; }, abstract = {In 2020, an outbreak of Salmonella Hadar illnesses was linked to contact with non-commercial, privately owned (backyard) poultry including live chickens, turkeys, and ducks, resulting in 848 illnesses. From late 2020 to 2021, this Salmonella Hadar strain caused an outbreak that was linked to ground turkey consumption. Core genome multilocus sequence typing (cgMLST) analysis determined that the Salmonella Hadar isolates detected during the outbreak linked to backyard poultry and the outbreak linked to ground turkey were closely related genetically (within 0-16 alleles). Epidemiological and traceback investigations were unable to determine how Salmonella Hadar detected in backyard poultry and ground turkey were linked, despite this genetic relatedness. Enhanced molecular characterization methods, such as analysis of the pangenome of Salmonella isolates, might be necessary to understand the relationship between these two outbreaks. Similarly, enhanced data collection during outbreak investigations and further research could potentially aid in determining whether these transmission vehicles are truly linked by a common source and what reservoirs exist across the poultry industries that allow Salmonella Hadar to persist. Further work combining epidemiological data collection, more detailed traceback information, and genomic analysis tools will be important for monitoring and investigating future enteric disease outbreaks.}, } @article {pmid39232174, year = {2024}, author = {Bolognini, D and Halgren, A and Lou, RN and Raveane, A and Rocha, JL and Guarracino, A and Soranzo, N and Chin, CS and Garrison, E and Sudmant, PH}, title = {Recurrent evolution and selection shape structural diversity at the amylase locus.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {39232174}, issn = {1476-4687}, abstract = {The adoption of agriculture triggered a rapid shift towards starch-rich diets in human populations[1]. Amylase genes facilitate starch digestion, and increased amylase copy number has been observed in some modern human populations with high-starch intake[2], although evidence of recent selection is lacking[3,4]. Here, using 94 long-read haplotype-resolved assemblies and short-read data from approximately 5,600 contemporary and ancient humans, we resolve the diversity and evolutionary history of structural variation at the amylase locus. We find that amylase genes have higher copy numbers in agricultural populations than in fishing, hunting and pastoral populations. We identify 28 distinct amylase structural architectures and demonstrate that nearly identical structures have arisen recurrently on different haplotype backgrounds throughout recent human history. AMY1 and AMY2A genes each underwent multiple duplication/deletion events with mutation rates up to more than 10,000-fold the single-nucleotide polymorphism mutation rate, whereas AMY2B gene duplications share a single origin. Using a pangenome-based approach, we infer structural haplotypes across thousands of humans identifying extensively duplicated haplotypes at higher frequency in modern agricultural populations. Leveraging 533 ancient human genomes, we find that duplication-containing haplotypes (with more gene copies than the ancestral haplotype) have rapidly increased in frequency over the past 12,000 years in West Eurasians, suggestive of positive selection. Together, our study highlights the potential effects of the agricultural revolution on human genomes and the importance of structural variation in human adaptation.}, } @article {pmid39232082, year = {2024}, author = {Rinker, DC and Sauters, TJC and Steffen, K and Gumilang, A and Raja, HA and Rangel-Grimaldo, M and Pinzan, CF and de Castro, PA and Dos Reis, TF and Delbaje, E and Houbraken, J and Goldman, GH and Oberlies, NH and Rokas, A}, title = {Strain heterogeneity in a non-pathogenic Aspergillus fungus highlights factors associated with virulence.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1082}, pmid = {39232082}, issn = {2399-3642}, support = {R01 AI153356/AI/NIAID NIH HHS/United States ; DEB-2110404//National Science Foundation (NSF)/ ; }, mesh = {Animals ; Virulence ; *Aspergillus/pathogenicity/genetics/metabolism ; Mice ; Gliotoxin/metabolism ; Disease Models, Animal ; Pulmonary Aspergillosis/microbiology ; Female ; Genome, Fungal ; }, abstract = {Fungal pathogens exhibit extensive strain heterogeneity, including variation in virulence. Whether closely related non-pathogenic species also exhibit strain heterogeneity remains unknown. Here, we comprehensively characterized the pathogenic potentials (i.e., the ability to cause morbidity and mortality) of 16 diverse strains of Aspergillus fischeri, a non-pathogenic close relative of the major pathogen Aspergillus fumigatus. In vitro immune response assays and in vivo virulence assays using a mouse model of pulmonary aspergillosis showed that A. fischeri strains varied widely in their pathogenic potential. Furthermore, pangenome analyses suggest that A. fischeri genomic and phenotypic diversity is even greater. Genomic, transcriptomic, and metabolic profiling identified several pathways and secondary metabolites associated with variation in virulence. Notably, strain virulence was associated with the simultaneous presence of the secondary metabolites hexadehydroastechrome and gliotoxin. We submit that examining the pathogenic potentials of non-pathogenic close relatives is key for understanding the origins of fungal pathogenicity.}, } @article {pmid39232008, year = {2024}, author = {Veseli, I and DeMers, MA and Cooper, ZS and Schechter, MS and Miller, S and Weber, L and Smith, CB and Rodriguez, LT and Schroer, WF and McIlvin, MR and Lopez, PZ and Saito, M and Dyhrman, S and Eren, AM and Moran, MA and Braakman, R}, title = {Digital Microbe: a genome-informed data integration framework for team science on emerging model organisms.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {967}, pmid = {39232008}, issn = {2052-4463}, support = {1746045//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; 542391//Simons Foundation/ ; }, mesh = {*Genome, Bacterial ; Genomics ; Software ; Flavobacteriaceae/genetics ; }, abstract = {The remarkable pace of genomic data generation is rapidly transforming our understanding of life at the micron scale. Yet this data stream also creates challenges for team science. A single microbe can have multiple versions of genome architecture, functional gene annotations, and gene identifiers; additionally, the lack of mechanisms for collating and preserving advances in this knowledge raises barriers to community coalescence around shared datasets. "Digital Microbes" are frameworks for interoperable and reproducible collaborative science through open source, community-curated data packages built on a (pan)genomic foundation. Housed within an integrative software environment, Digital Microbes ensure real-time alignment of research efforts for collaborative teams and facilitate novel scientific insights as new layers of data are added. Here we describe two Digital Microbes: 1) the heterotrophic marine bacterium Ruegeria pomeroyi DSS-3 with > 100 transcriptomic datasets from lab and field studies, and 2) the pangenome of the cosmopolitan marine heterotroph Alteromonas containing 339 genomes. Examples demonstrate how an integrated framework collating public (pan)genome-informed data can generate novel and reproducible findings.}, } @article {pmid39228856, year = {2024}, author = {Fan, X and Chen, L and Chen, M and Zhang, N and Chang, H and He, M and Shen, Z and Zhang, L and Ding, H and Xie, Y and Huang, Y and Ke, W and Xiao, M and Zang, X and Xu, H and Fang, W and Li, S and Cao, C and Xu, Y and Shan, S and Wu, W and Chen, C and Xue, X and Wang, L}, title = {Pan-omics-based characterization and prediction of highly multidrug-adapted strains from an outbreak fungal species complex.}, journal = {Innovation (Cambridge (Mass.))}, volume = {5}, number = {5}, pages = {100681}, pmid = {39228856}, issn = {2666-6758}, abstract = {Strains from the Cryptococcus gattii species complex (CGSC) have caused the Pacific Northwest cryptococcosis outbreak, the largest cluster of life-threatening fungal infections in otherwise healthy human hosts known to date. In this study, we utilized a pan-phenome-based method to assess the fitness outcomes of CGSC strains under 31 stress conditions, providing a comprehensive overview of 2,821 phenotype-strain associations within this pathogenic clade. Phenotypic clustering analysis revealed a strong correlation between distinct types of stress phenotypes in a subset of CGSC strains, suggesting that shared determinants coordinate their adaptations to various stresses. Notably, a specific group of strains, including the outbreak isolates, exhibited a remarkable ability to adapt to all three of the most commonly used antifungal drugs for treating cryptococcosis (amphotericin B, 5-fluorocytosine, and fluconazole). By integrating pan-genomic and pan-transcriptomic analyses, we identified previously unrecognized genes that play crucial roles in conferring multidrug resistance in an outbreak strain with high multidrug adaptation. From these genes, we identified biomarkers that enable the accurate prediction of highly multidrug-adapted CGSC strains, achieving maximum accuracy and area under the curve (AUC) of 0.79 and 0.86, respectively, using machine learning algorithms. Overall, we developed a pan-omic approach to identify cryptococcal multidrug resistance determinants and predict highly multidrug-adapted CGSC strains that may pose significant clinical concern.}, } @article {pmid39228791, year = {2024}, author = {Do, VH and Nguyen, VS and Nguyen, SH and Le, DQ and Nguyen, TT and Nguyen, CH and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {PanKA: Leveraging population pangenome to predict antibiotic resistance.}, journal = {iScience}, volume = {27}, number = {9}, pages = {110623}, pmid = {39228791}, issn = {2589-0042}, abstract = {Machine learning has the potential to be a powerful tool in the fight against antimicrobial resistance (AMR), a critical global health issue. Machine learning can identify resistance mechanisms from DNA sequence data without prior knowledge. The first step in building a machine learning model is a feature extraction from sequencing data. Traditional methods like single nucleotide polymorphism (SNP) calling and k-mer counting yield numerous, often redundant features, complicating prediction and analysis. In this paper, we propose PanKA, a method using the pangenome to extract a concise set of relevant features for predicting AMR. PanKA not only enables fast model training and prediction but also improves accuracy. Applied to the Escherichia coli and Klebsiella pneumoniae bacterial species, our model is more accurate than conventional and state-of-the-art methods in predicting AMR.}, } @article {pmid39227987, year = {2024}, author = {Bonnici, V and Chicco, D}, title = {Seven quick tips for gene-focused computational pangenomic analysis.}, journal = {BioData mining}, volume = {17}, number = {1}, pages = {28}, pmid = {39227987}, issn = {1756-0381}, support = {InfoLife//CINI (Consorzio Interuniversitario Nazionale per l'Informatica)/ ; Project Age-It (Ageing Well in an Ageing Society)//European Union - Next Generation EU programme/ ; ReGAInS//Ministero dell'Università e della Ricerca of Italy/ ; }, abstract = {Pangenomics is a relatively new scientific field which investigates the union of all the genomes of a clade. The word pan means everything in ancient Greek; the term pangenomics originally regarded genomes of bacteria and was later intended to refer to human genomes as well. Modern bioinformatics offers several tools to analyze pangenomics data, paving the way to an emerging field that we can call computational pangenomics. Current computational power available for the bioinformatics community has made computational pangenomic analyses easy to perform, but this higher accessibility to pangenomics analysis also increases the chances to make mistakes and to produce misleading or inflated results, especially by beginners. To handle this problem, we present here a few quick tips for efficient and correct computational pangenomic analyses with a focus on bacterial pangenomics, by describing common mistakes to avoid and experienced best practices to follow in this field. We believe our recommendations can help the readers perform more robust and sound pangenomic analyses and to generate more reliable results.}, } @article {pmid39221271, year = {2024}, author = {Trisakul, K and Hinwan, Y and Eisiri, J and Salao, K and Chaiprasert, A and Kamolwat, P and Tongsima, S and Campino, S and Phelan, J and Clark, TG and Faksri, K}, title = {Comparisons of genome assembly tools for characterization of Mycobacterium tuberculosis genomes using hybrid sequencing technologies.}, journal = {PeerJ}, volume = {12}, number = {}, pages = {e17964}, pmid = {39221271}, issn = {2167-8359}, mesh = {*Mycobacterium tuberculosis/genetics ; *Genome, Bacterial/genetics ; *High-Throughput Nucleotide Sequencing/methods ; Humans ; Polymorphism, Single Nucleotide/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: Next-generation sequencing of Mycobacterium tuberculosis, the infectious agent causing tuberculosis, is improving the understanding of genomic diversity of circulating lineages and strain-types, and informing knowledge of drug resistance mutations. An increasingly popular approach to characterizing M. tuberculosis genomes (size: 4.4 Mbp) and variants (e.g., single nucleotide polymorphisms (SNPs)) involves the de novo assembly of sequence data.

METHODS: We compared the performance of genome assembly tools (Unicycler, RagOut, and RagTag) on sequence data from nine drug resistant M. tuberculosis isolates (multi-drug (MDR) n = 1; pre-extensively-drug (pre-XDR) n = 8) generated using Illumina HiSeq, Oxford Nanopore Technology (ONT) PromethION, and PacBio platforms.

RESULTS: Our investigation found that Unicycler-based assemblies had significantly higher genome completeness (~98.7%; p values = 0.01) compared to other assembler tools (RagOut = 98.6%, and RagTag = 98.6%). The genome assembly sizes (bp) across isolates and sequencers based on RagOut was significantly longer (p values < 0.001) (4,418,574 ± 8,824 bp) than Unicycler and RagTag assemblies (Unicycler = 4,377,642 ± 55,257 bp, and RagTag = 4,380,711 ± 51,164 bp). RagOut-based assemblies had the fewest contigs (~32) and the longest genome size (4,418,574 bp; vs. H37Rv reference size 4,411,532 bp) and therefore were chosen for downstream analysis. Pan-genome analysis of Illumina and PacBio hybrid assemblies revealed the greatest number of detected genes (4,639 genes; H37Rv reference contains 3,976 genes), while Illumina and ONT hybrid assemblies produced the highest number of SNPs. The number of genes from hybrid assemblies with ONT and PacBio long-reads (mean: 4,620 genes) was greater than short-read assembly alone (4,478 genes). All nine RagOut hybrid genome assemblies detected known mutations in genes associated with MDR-TB and pre-XDR-TB.

CONCLUSIONS: Unicycler software performed the best in terms of achieving contiguous genomes, whereas RagOut improved the quality of Unicycler's genome assemblies by providing a longer genome size. Overall, our approach has demonstrated that short-read and long-read hybrid assembly can provide a more complete genome assembly than short-read assembly alone by detecting pan-genomes and more genes, including IS6110, and SNPs.}, } @article {pmid39218842, year = {2024}, author = {Mane, RS and Prasad, BD and Sahni, S and Quaiyum, Z and Sharma, VK}, title = {Biotechnological studies towards improvement of finger millet using multi-omics approaches.}, journal = {Functional & integrative genomics}, volume = {24}, number = {5}, pages = {148}, pmid = {39218842}, issn = {1438-7948}, mesh = {*Plant Breeding/methods ; *Eleusine/genetics ; *Genomics/methods ; Gene Editing/methods ; Crops, Agricultural/genetics ; Genome, Plant ; Biotechnology ; Multiomics ; }, abstract = {A plethora of studies have uncovered numerous important genes with agricultural significance in staple crops. However, when it comes to orphan crops like minor millet, genomic research lags significantly behind that of major crops. This situation has promoted a focus on exploring research opportunities in minor millets, particularly in finger millet, using cutting-edge methods. Finger millet, a coarse cereal known for its exceptional nutritional content and ability to withstand environmental stresses represents a promising climate-smart and nutritional crop in the battle against escalating environmental challenges. The existing traditional improvement programs for finger millet are insufficient to address global hunger effectively. The lack of utilization of high-throughput platforms, genome editing, haplotype breeding, and advanced breeding approaches hinders the systematic multi-omics studies on finger millet, which are essential for pinpointing crucial genes related to agronomically important and various stress responses. The growing environmental uncertainties have widened the gap between the anticipated and real progress in crop improvement. To overcome these challenges a combination of cutting-edge multi-omics techniques such as high-throughput sequencing, speed breeding, mutational breeding, haplotype-based breeding, genomic selection, high-throughput phenotyping, pangenomics, genome editing, and more along with integration of deep learning and artificial intelligence technologies are essential to accelerate research efforts in finger millet. The scarcity of multi-omics approaches in finger millet leaves breeders with limited modern tools for crop enhancement. Therefore, leveraging datasets from previous studies could prove effective in implementing the necessary multi-omics interventions to enrich the genetic resource in finger millet.}, } @article {pmid39215522, year = {2024}, author = {Gao, J and Xu, Y}, title = {DNA sequences alignment method using sparse index on pan-genome graph.}, journal = {Journal of bioinformatics and computational biology}, volume = {}, number = {}, pages = {2450019}, doi = {10.1142/S0219720024500197}, pmid = {39215522}, issn = {1757-6334}, abstract = {The graph of sequences represents the genetic variations of pan-genome concisely and space-efficiently than multiple linear reference genome. In order to accelerate aligning reads to the graph, an index of graph-based reference genomes is used to obtain candidate locations. However, the potential combinatorial explosion of nodes on the sequence graph leads to increasing the index space and maximum memory usage of alignment process considerably, especially for large-scale datasets. For this, existing methods typically attempt to prune complex regions, or extend the length of seeds, which sacrifices the recall of alignment algorithm despite reducing space usage slightly. We present the Sparse-index of Graph (SIG) and alignment algorithm SIG-Aligner, capable of indexing and aligning at the lower memory cost. SIG builds the non-overlapping minimizers index inside nodes of sequence graph and SIG-Aligner filters out most of the false positive matches by the method based on the pigeonhole principle. Compared to Giraffe, the results of computational experiments show that SIG achieves a significant reduction in index memory space ranging from 50% to 75% for the human pan-genome graphs, while still preserving superior or comparable accuracy of alignment and the faster alignment time.}, } @article {pmid39213169, year = {2024}, author = {Andrews, KR and Besser, TE and Stalder, T and Top, EM and Baker, KN and Fagnan, MW and New, DD and Schneider, GM and Gal, A and Andrews-Dickert, R and Hunter, SS and Beckmen, KB and Christensen, L and Justice-Allen, A and Konetchy, D and Lehman, CP and Manlove, K and Miyasaki, H and Nordeen, T and Roug, A and Cassirer, EF}, title = {Comparative genomic analysis identifies potential adaptive variation in Mycoplasma ovipneumoniae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001279}, pmid = {39213169}, issn = {2057-5858}, mesh = {Animals ; *Mycoplasma ovipneumoniae/genetics ; *Goats/microbiology ; *Phylogeny ; Sheep/microbiology ; *Genome, Bacterial ; Genomics ; Reindeer/microbiology ; China ; Sheep Diseases/microbiology ; Adaptation, Physiological/genetics ; Australia ; Pneumonia, Mycoplasma/microbiology/veterinary ; }, abstract = {Mycoplasma ovipneumoniae is associated with respiratory disease in wild and domestic Caprinae globally, with wide variation in disease outcomes within and between host species. To gain insight into phylogenetic structure and mechanisms of pathogenicity for this bacterial species, we compared M. ovipneumoniae genomes for 99 samples from 6 countries (Australia, Bosnia and Herzegovina, Brazil, China, France and USA) and 4 host species (domestic sheep, domestic goats, bighorn sheep and caribou). Core genome sequences of M. ovipneumoniae assemblies from domestic sheep and goats fell into two well-supported phylogenetic clades that are divergent enough to be considered different bacterial species, consistent with each of these two clades having an evolutionary origin in separate host species. Genome assemblies from bighorn sheep and caribou also fell within these two clades, indicating multiple spillover events, most commonly from domestic sheep. Pangenome analysis indicated a high percentage (91.4 %) of accessory genes (i.e. genes found only in a subset of assemblies) compared to core genes (i.e. genes found in all assemblies), potentially indicating a propensity for this pathogen to adapt to within-host conditions. In addition, many genes related to carbon metabolism, which is a virulence factor for Mycoplasmas, showed evidence for homologous recombination, a potential signature of adaptation. The presence or absence of annotated genes was very similar between sheep and goat clades, with only two annotated genes significantly clade-associated. However, three M. ovipneumoniae genome assemblies from asymptomatic caribou in Alaska formed a highly divergent subclade within the sheep clade that lacked 23 annotated genes compared to other assemblies, and many of these genes had functions related to carbon metabolism. Overall, our results suggest that adaptation of M. ovipneumoniae has involved evolution of carbon metabolism pathways and virulence mechanisms related to those pathways. The genes involved in these pathways, along with other genes identified as potentially involved in virulence in this study, are potential targets for future investigation into a possible genomic basis for the high variation observed in disease outcomes within and between wild and domestic host species.}, } @article {pmid39212644, year = {2024}, author = {Askenasy, I and Swain, JEV and Ho, PM and Nazeer, RR and Welch, A and Bényei, ÉB and Mancini, L and Nir, S and Liao, P and Welch, M}, title = {'Wild Type'.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {8}, pages = {}, doi = {10.1099/mic.0.001495}, pmid = {39212644}, issn = {1465-2080}, mesh = {*Genome, Bacterial ; Bacteria/genetics/classification/isolation & purification ; Evolution, Molecular ; Genetic Variation ; Genomics ; }, abstract = {In this opinion piece, we consider the meaning of the term 'wild type' in the context of microbiology. This is especially pertinent in the post-genomic era, where we have a greater awareness of species diversity than ever before. Genomic heterogeneity, in vitro evolution/selection pressures, definition of 'the wild', the size and importance of the pan-genome, gene-gene interactions (epistasis), and the nature of the 'wild-type gene' are all discussed. We conclude that wild type is an outdated and even misleading phrase that should be gradually phased out.}, } @article {pmid39212029, year = {2024}, author = {de Block, T and De Baetselier, I and Van den Bossche, D and Abdellati, S and Gestels, Z and Laumen, JGE and Van Dijck, C and Vanbaelen, T and Claes, N and Vandelannoote, K and Kenyon, C and Harrison, O and Santhini Manoharan-Basil, S}, title = {Genomic oropharyngeal Neisseria surveillance detects MALDI-TOF MS species misidentifications and reveals a novel Neisseria cinerea clade.}, journal = {Journal of medical microbiology}, volume = {73}, number = {8}, pages = {}, doi = {10.1099/jmm.0.001871}, pmid = {39212029}, issn = {1473-5644}, mesh = {*Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization/methods ; *Oropharynx/microbiology ; Humans ; *Whole Genome Sequencing ; *Multilocus Sequence Typing/methods ; *Genome, Bacterial ; Neisseria cinerea/genetics ; Phylogeny ; Neisseria/classification/genetics/isolation & purification ; Belgium ; Neisseria meningitidis/genetics/classification/isolation & purification ; Neisseriaceae Infections/microbiology/diagnosis ; }, abstract = {Introduction. Commensal Neisseria spp. are highly prevalent in the oropharynx as part of the healthy microbiome. N. meningitidis can colonise the oropharynx too from where it can cause invasive meningococcal disease. To identify N. meningitidis, clinical microbiology laboratories often rely on Matrix Assisted Laser Desorption/Ionisation Time of Flight Mass Spectrometry (MALDI-TOF MS).Hypothesis/Gap statement. N. meningitidis may be misidentified by MALDI-TOF MS.Aim. To conduct genomic surveillance of oropharyngeal Neisseria spp. in order to: (i) verify MALDI-TOF MS species identification, and (ii) characterize commensal Neisseria spp. genomes.Methodology. We analysed whole genome sequence (WGS) data from 119 Neisseria spp. isolates from a surveillance programme for oropharyngeal Neisseria spp. in Belgium. Different species identification methods were compared: (i) MALDI-TOF MS, (ii) Ribosomal Multilocus Sequence Typing (rMLST) and (iii) rplF gene species identification. WGS data were used to further characterize Neisseria species found with supplementary analyses of Neisseria cinerea genomes.Results. Based on genomic species identification, isolates from the oropharyngeal Neisseria surveilence study were composed of the following species: N. meningitidis (n=23), N. subflava (n=61), N. mucosa (n=15), N. oralis (n=8), N. cinerea (n=5), N. elongata (n=3), N. lactamica (n=2), N. bacilliformis (n=1) and N. polysaccharea (n=1). Of these 119 isolates, four isolates identified as N. meningitidis (n=3) and N. subflava (n=1) by MALDI-TOF MS, were determined to be N. polysaccharea (n=1), N. cinerea (n=2) and N. mucosa (n=1) by rMLST. Phylogenetic analyses revealed that N. cinerea isolates from the general population (n=3, cluster one) were distinct from those obtained from men who have sex with men (MSM, n=2, cluster two). The latter contained genomes misidentified as N. meningitidis using MALDI-TOF MS. These two N. cinerea clusters persisted after the inclusion of published N. cinerea WGS (n=42). Both N. cinerea clusters were further defined through pangenome and Average Nucleotide Identity (ANI) analyses.Conclusion. This study provides insights into the importance of genomic genus-wide Neisseria surveillance studies to improve the characterization and identification of the Neisseria genus.}, } @article {pmid39211246, year = {2024}, author = {Hughes Lago, C and Blackburn, D and Kinder Pavlicek, M and Threadgill, DS}, title = {Comparative Genomic Analysis of Campylobacter rectus and Closely Related Species.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.26.605372}, pmid = {39211246}, issn = {2692-8205}, abstract = {Campylobacter rectus is a gram-negative, anaerobic bacterium strongly associated with periodontitis. It also causes various extraoral infections and is linked to adverse pregnancy outcomes in humans and murine models. C. rectus and related oral Campylobacters have been termed "emerging Campylobacter species" because infections by these organisms are likely underreported. Previously, no comparative methods have been used to analyze more than single C. rectus strains and until recently, very few C. rectus genomes have been publicly available. More sequenced genomes and comparative analyses are needed to study the genomic features and pathogenicity of this species. We sequenced eight new C. rectus strains and used comparative methods to identify regions of interest. An emphasis was put on the type III flagellar secretion system (T3SS), type IV secretion system (T4SS), and type VI secretion system (T6SS) because these protein complexes are important for pathogenesis in other Campylobacter species. RAST, BV-BRC, and other bioinformatics tools were used to assemble, annotate, and compare these regions in the genomes. The pan-genome of C. rectus consists of 2670 genes with core and accessory genomes of 1429 and 1241 genes, respectively. All isolates analyzed in this study have T3SS and T6SS hallmark proteins, while five of the isolates are missing a T4SS system. Twenty-one prophage clusters were identified across the panel of isolates, including four that appear intact. Overall, significant genomic islands were found, suggesting regions in the genomes that underwent horizontal gene transfer. Additionally, the high frequency of CRISPR arrays and other repetitive elements has led to genome rearrangements across the strains, including in areas adjacent to secretion system gene clusters. This study describes the substantial diversity present among C. rectus isolates and highlights tools/assays that have been developed to permit functional genomic studies. Additionally, we have expanded the studies on C. showae T4SS since we have two new C. showae genomes to report. We also demonstrate that unlike C. rectus , C showae does not demonstrate evidence of intact T6SS except for the strain CAM. The only strain of sequenced C. massilensis has neither T4SS or T6SS.}, } @article {pmid39203545, year = {2024}, author = {Gheorghe-Barbu, I and Dragomir, RI and Gradisteanu Pircalabioru, G and Surleac, M and Dinu, IA and Gaboreanu, MD and Czobor Barbu, I}, title = {Tracing Acinetobacter baumannii's Journey from Hospitals to Aquatic Ecosystems.}, journal = {Microorganisms}, volume = {12}, number = {8}, pages = {}, doi = {10.3390/microorganisms12081703}, pmid = {39203545}, issn = {2076-2607}, support = {ERANET 243/2021//ERANET/ ; PN-III-P4-PCE-2021-1797 (PCE 96/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; PN-III-P1-1.1-TE-2021-1515 (TE112/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; PN-III-P1-1.1-PD-2021-0540 (PD102/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; }, abstract = {BACKGROUND: This study provides a comprehensive analysis of Acinetobacter baumannii in aquatic environments and fish microbiota by integrating culture-dependent methods, 16S metagenomics, and antibiotic resistance profiling.

METHODS: A total of 83 A. baumannii isolates were recovered using culture-dependent methods from intra-hospital infections (IHI) and wastewater (WW) and surface water (SW) samples from two southern Romanian cities in August 2022. The antibiotic susceptibility was screened using disc diffusion, microdilution, PCR, and Whole Genome Sequencing assays.

RESULTS: The highest microbial load in the analyzed samples was found in Glina, Bucharest, for both WW and SW samples across all investigated phenotypes. For Bucharest isolates, the resistance levels corresponded to fluoroquinolones > aminoglycosides > β-lactam antibiotics. In contrast, A. baumannii from upstream SW samples in Târgoviște showed the highest resistance to aminoglycosides. The blaOXA-23 gene was frequently detected in IHI, WW, and SW isolates in Bucharest, but was absent in Târgoviște. Molecular phylogeny revealed the presence of ST10 in Târgoviște isolates and ST2 in Bucharest isolates, while other minor STs were not specifically correlated with a sampling point. Using 16S rRNA sequencing, significant differences in microbial populations between the two locations was identified. The low abundance of Alphaproteobacteria and Actinobacteria in both locations suggests environmental pressures or contamination events.

CONCLUSIONS: These findings indicate significant fecal contamination and potential public health risks, emphasizing the need for improved water quality monitoring and management.}, } @article {pmid39203478, year = {2024}, author = {Zhang, L and Kulyar, MF and Niu, T and Yang, S and Chen, W}, title = {Comparative Genomics of Limosilactobacillus reuteri YLR001 Reveals Genetic Diversity and Probiotic Properties.}, journal = {Microorganisms}, volume = {12}, number = {8}, pages = {}, doi = {10.3390/microorganisms12081636}, pmid = {39203478}, issn = {2076-2607}, support = {32202873//National Natural Science Foundation of China/ ; 22JR5RA885//Youth Science and Technology Fund Project of Gansu Province/ ; GAU-KYQD-2021-09//Scientific Research Start-up Funds for Openly recruited Doctors of Gansu Agricultural University/ ; }, abstract = {To gain deeper insights into the genomic characteristics of Limosilactobacillus reuteri (L. reuteri) YLR001 and uncover its probiotic properties, in the current study, a comprehensive analysis of its whole genome was conducted, explicitly exploring the genetic variations associated with different host organisms. The genome of YLR001 consisted of a circular 2,242,943 bp chromosome with a GC content of 38.84%, along with three circular plasmids (24,864, 38, 926, and 132,625 bp). Among the 2183 protein-coding sequences (CDSs), the specific genes associated with genetic adaptation and stress resistance were identified. We predicted the function of COG protein genes and analyzed the KEGG pathways. Comparative genome analysis revealed that the pan-genome contained 5207 gene families, including 475 core gene families and 941 strain-specific genes. Phylogenetic analysis revealed distinct host specificity among 20 strains of L. reuteri, highlighting substantial genetic diversity across different hosts. This study enhanced our comprehension of the genetic diversity of L. reuteri YLR001, demonstrated its potential probiotic characteristics, and established more solid groundwork for future applications.}, } @article {pmid39201777, year = {2024}, author = {Gureeva, MV and Muntyan, MS and Ravin, NV and Grabovich, MY}, title = {Wastewater Treatment with Bacterial Representatives of the Thiothrix Morphotype.}, journal = {International journal of molecular sciences}, volume = {25}, number = {16}, pages = {}, doi = {10.3390/ijms25169093}, pmid = {39201777}, issn = {1422-0067}, support = {20-14-00137//Russian Science Foundation/ ; }, mesh = {*Wastewater/microbiology ; *Thiothrix/metabolism/genetics ; Water Purification/methods ; Sewage/microbiology ; Sulfides/metabolism ; Waste Disposal, Fluid/methods ; }, abstract = {Bacteria of the Thiothrix morphotype, comprising the genera Thiothrix, Thiolinea and Thiofilum, are frequently encountered in domestic and industrial wastewater treatment systems, but they are usually not clearly differentiated due to the marked similarity in their morphologies. Methods ranging from light microscopy, FISH and PCR to modern high-throughput sequencing are used to identify them. The development of these bacteria in wastewater treatment systems has both advantages and disadvantages. On the one hand, the explosive growth of these bacteria can lead to activated sludge bulking or clogging of the treatment system's membranes, with a consequent decrease in the water treatment efficiency. On the other hand, members of the Thiothrix morphotype can improve the quality of granular sludge and increase the water treatment efficiency. This may be due to their capacity for sulfide oxidation, denitrification combined with the oxidation of reduced sulfur compounds, enhanced biological phosphate removal and possibly denitrifying phosphate removal. The recently obtained pangenome of the genus Thiothrix allows the explanation, at the genomic level, of the experimental results of various studies. Moreover, this review summarizes the data on the factors affecting the proliferation of representatives of the Thiothrix morphotype.}, } @article {pmid39201547, year = {2024}, author = {Heo, S and Jung, EJ and Park, MK and Sung, MH and Jeong, DW}, title = {Evolution and Competitive Struggles of Lactiplantibacillus plantarum under Different Oxygen Contents.}, journal = {International journal of molecular sciences}, volume = {25}, number = {16}, pages = {}, doi = {10.3390/ijms25168861}, pmid = {39201547}, issn = {1422-0067}, support = {RS-2022-IP322014//Korea Institute of Planning and Evaluation for Technology in Food, Agriculture and Forestry (IPET)/ ; }, mesh = {*Oxygen/metabolism ; *Evolution, Molecular ; Phylogeny ; Lactobacillus plantarum/genetics/metabolism ; Genome, Bacterial ; Anaerobiosis ; Animals ; Humans ; }, abstract = {Lactiplantibacillus (Lb.) plantarum is known as a benign bacterium found in various habitats, including the intestines of animals and fermented foods. Since animal intestines lack oxygen, while fermented foods provide a limited or more oxygen environment, this study aimed to investigate whether there were genetic differences in the growth of Lb. plantarum under aerobic vs. anaerobic conditions. Genomic analysis of Lb. plantarum obtained from five sources-animals, dairy products, fermented meat, fermented vegetables, and humans-was conducted. The analysis included not only an examination of oxygen-utilizing genes but also a comparative pan-genomic analysis to investigate evolutionary relationships between genomes. The ancestral gene analysis of the evolutionary pathway classified Lb. plantarum into groups A and B, with group A further subdivided into A1 and A2. It was confirmed that group A1 does not possess the narGHIJ operon, which is necessary for energy production under limited oxygen conditions. Additionally, it was found that group A1 has experienced more gene acquisition and loss compared to groups A2 and B. Despite an initial assumption that there would be genetic distinctions based on the origin (aerobic or anaerobic conditions), it was observed that such differentiation could not be attributed to the origin. However, the evolutionary process indicated that the loss of genes related to nitrate metabolism was essential in anaerobic or limited oxygen conditions, contrary to the initial hypothesis.}, } @article {pmid39200041, year = {2024}, author = {González-Fernández, A and Mencía-Ares, O and García-Iglesias, MJ and Petrocchi-Rilo, M and Miguélez-Pérez, R and Gutiérrez-Martín, CB and Martínez-Martínez, S}, title = {Virulence and Antimicrobial Resistance Characterization of Glaesserella parasuis Isolates Recovered from Spanish Swine Farms.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {8}, pages = {}, doi = {10.3390/antibiotics13080741}, pmid = {39200041}, issn = {2079-6382}, support = {PID2019-105125RB-I00//Ministerio de Ciencia, Innovación y Universidades/ ; }, abstract = {Glaesserella (Haemophilus) parasuis, the causative agent of Glässer's disease, is present in most pig farms as an early colonizer of the upper respiratory tract. It exhibits remarkable variability in virulence and antimicrobial resistance (AMR), with virulent strains capable of inducing respiratory or systemic disease. This study aimed to characterize the virulence and the AMR profiles in 65 G. parasuis isolates recovered from Spanish swine farms. Virulence was assessed using multiplex leader sequence (LS)-PCR targeting vtaA genes, with all isolates identified as clinical (presumed virulent). Pathotyping based on ten pangenome genes revealed the virulent HPS_22970 as the most frequent (83.1%). Diverse pathotype profiles were observed, with 29 unique gene combinations and two isolates carrying only potentially non-virulent pangenome genes. AMR phenotyping showed widespread resistance, with 63.3% classified as multidrug resistant, and high resistance to clindamycin (98.3%) and tylosin (93.3%). A very strong association was found between certain pathotype genes and AMR phenotypes, notably between the virulent HPS_22970 and tetracycline resistance (p < 0.001; Φ = 0.58). This study reveals the wide diversity and complexity of G. parasuis pathogenicity and AMR phenotype, emphasizing the need for the targeted characterization of clinical isolates to ensure appropriate antimicrobial treatments and the implementation of prophylactic measures against virulent strains.}, } @article {pmid39200037, year = {2024}, author = {Machado, MAM and Panzenhagen, P and Lázaro, C and Rojas, M and Figueiredo, EES and Conte-Junior, CA}, title = {Unveiling the High Diversity of Clones and Antimicrobial Resistance Genes in Escherichia coli Originating from ST10 across Different Ecological Niches.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {8}, pages = {}, doi = {10.3390/antibiotics13080737}, pmid = {39200037}, issn = {2079-6382}, support = {313119/2020-1, 140016/2021-0, and 310181/2021-6//National Council for Scientific and Technological Development/ ; E-26/200.891/2021, E-26/204.145/2022 and E-26/201.638/2024//Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro/ ; }, abstract = {In this pioneering in silico study in Peru, we aimed to analyze Escherichia coli (E. coli) genomes for antimicrobial resistance genes (ARGs) diversity and virulence and for its mobilome. For this purpose, 469 assemblies from human, domestic, and wild animal hosts were investigated. Of these genomes, three were E. coli strains (pv05, pv06, and sf25) isolated from chickens in our previous study, characterized for antimicrobial susceptibility profile, and sequenced in this study. Three other genomes were included in our repertoire for having rare cgMLSTs. The phenotypic analysis for antimicrobial resistance revealed that pv05, pv06, and sf25 strains presented multidrug resistance to antibiotics belonging to at least three classes. Our in silico analysis indicated that many Peruvian genomes included resistance genes, mainly to the aminoglycoside class, ESBL-producing E. coli, sulfonamides, and tetracyclines. In addition, through Multi-locus Sequence Typing, we found more than 180 different STs, with ST10 being the most prevalent among the genomes. Pan-genome mapping revealed that, with new lineages, the repertoire of accessory genes in E. coli increased, especially genes related to resistance and persistence, which may be carried by plasmids. The results also demonstrated several genes related to adhesion, virulence, and pathogenesis, especially genes belonging to the high pathogenicity island (HPI) from Yersinia pestis, with a prevalence of 42.2% among the genomes. The complexity of the genetic profiles of resistance and virulence in our study highlights the adaptability of the pathogen to different environments and hosts. Therefore, our in silico analysis through genome sequencing enables tracking the epidemiology of E. coli from Peru and the future development of strategies to mitigate its survival.}, } @article {pmid39196267, year = {2024}, author = {King, AC and Kumar, N and Mellor, KC and Hawkins, PA and McGee, L and Croucher, NJ and Bentley, SD and Lees, JA and Lo, SW}, title = {Comparison of gene-by-gene and genome-wide short nucleotide sequence-based approaches to define the global population structure of Streptococcus pneumoniae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001278}, pmid = {39196267}, issn = {2057-5858}, mesh = {*Streptococcus pneumoniae/genetics/classification ; *Multilocus Sequence Typing/methods ; *Genome, Bacterial ; *Phylogeny ; Cluster Analysis ; Humans ; Genomics/methods ; }, abstract = {Defining the population structure of a pathogen is a key part of epidemiology, as genomically related isolates are likely to share key clinical features such as antimicrobial resistance profiles and invasiveness. Multiple different methods are currently used to cluster together closely related genomes, potentially leading to inconsistency between studies. Here, we use a global dataset of 26 306 Streptococcus pneumoniae genomes to compare four clustering methods: gene-by-gene seven-locus MLST, core genome MLST (cgMLST)-based hierarchical clustering (HierCC) assignments, life identification number (LIN) barcoding and k-mer-based PopPUNK clustering (known as GPSCs in this species). We compare the clustering results with phylogenetic and pan-genome analyses to assess their relationship with genome diversity and evolution, as we would expect a good clustering method to form a single monophyletic cluster that has high within-cluster similarity of genomic content. We show that the four methods are generally able to accurately reflect the population structure based on these metrics and that the methods were broadly consistent with each other. We investigated further to study the discrepancies in clusters. The greatest concordance was seen between LIN barcoding and HierCC (adjusted mutual information score=0.950), which was expected given that both methods utilize cgMLST, but have different methods for defining an individual cluster and different core genome schema. However, the existence of differences between the two methods shows that the selection of a core genome schema can introduce inconsistencies between studies. GPSC and HierCC assignments were also highly concordant (AMI=0.946), showing that k-mer-based methods which use the whole genome and do not require the careful selection of a core genome schema are just as effective at representing the population structure. Additionally, where there were differences in clustering between these methods, this could be explained by differences in the accessory genome that were not identified in cgMLST. We conclude that for S. pneumoniae, standardized and stable nomenclature is important as the number of genomes available expands. Furthermore, the research community should transition away from seven-locus MLST, whilst cgMLST, GPSC and LIN assignments should be used more widely. However, to allow for easy comparison between studies and to make previous literature relevant, the reporting of multiple clustering names should be standardized within the research.}, } @article {pmid39194902, year = {2024}, author = {Chen, G and Shi, G and Dai, Y and Zhao, R and Wu, Q}, title = {Graph-Based Pan-Genome Reveals the Pattern of Deleterious Mutations during the Domestication of Saccharomyces cerevisiae.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {10}, number = {8}, pages = {}, doi = {10.3390/jof10080575}, pmid = {39194902}, issn = {2309-608X}, support = {32170015//National Natural Science Foundation of China/ ; }, abstract = {The "cost of domestication" hypothesis suggests that the domestication of wild species increases the number, frequency, and/or proportion of deleterious genetic variants, potentially reducing their fitness in the wild. While extensively studied in domesticated species, this phenomenon remains understudied in fungi. Here, we used Saccharomyces cerevisiae, the world's oldest domesticated fungus, as a model to investigate the genomic characteristics of deleterious variants arising from fungal domestication. Employing a graph-based pan-genome approach, we identified 1,297,761 single nucleotide polymorphisms (SNPs), 278,147 insertion/deletion events (indels; <30 bp), and 19,967 non-redundant structural variants (SVs; ≥30 bp) across 687 S. cerevisiae isolates. Comparing these variants with synonymous SNPs (sSNPs) as neutral controls, we found that the majority of the derived nonsynonymous SNPs (nSNPs), indels, and SVs were deleterious. Heterozygosity was positively correlated with the impact of deleterious SNPs, suggesting a role of genetic diversity in mitigating their effects. The domesticated isolates exhibited a higher additive burden of deleterious SNPs (dSNPs) than the wild isolates, but a lower burden of indels and SVs. Moreover, the domesticated S. cerevisiae showed reduced rates of adaptive evolution relative to the wild S. cerevisiae. In summary, deleterious variants tend to be heterozygous, which may mitigate their harmful effects, but they also constrain breeding potential. Addressing deleterious alleles and minimizing the genetic load are crucial considerations for future S. cerevisiae breeding efforts.}, } @article {pmid39192886, year = {2024}, author = {Gagie, T}, title = {How to Find Long Maximal Exact Matches and Ignore Short Ones.}, journal = {Developments in language theory. Conference on Developments in Language Theory}, volume = {14791}, number = {}, pages = {131-140}, doi = {10.1007/978-3-031-66159-4_10}, pmid = {39192886}, abstract = {Finding maximal exact matches (MEMs) between strings is an important task in bioinformatics, but it is becoming increasingly challenging as geneticists switch to pangenomic references. Fortunately, we are usually interested only in the relatively few MEMs that are longer than we would expect by chance. In this paper we show that under reasonable assumptions we can find all MEMs of length at least L between a pattern of length m and a text of length n in O (m) time plus extra O (l o g n) time only for each MEM of length at least nearly L using a compact index for the text, suitable for pangenomics.}, } @article {pmid39192052, year = {2024}, author = {Huang, B and Fan, C and Chen, K and Rao, J and Ou, P and Tian, C and Yang, Y and Cooper, DN and Zhao, H}, title = {VCAT: an integrated variant function annotation tools.}, journal = {Human genetics}, volume = {}, number = {}, pages = {}, pmid = {39192052}, issn = {1432-1203}, support = {2023YFF1204900//National Key Research and Development Program of China/ ; 81971190//Natural Science Foundation of China/ ; 2021A1515010256//Guangdong Key Field Research and Development Plan/ ; 202007030010//Guangzhou Science and Technology Research Plan/ ; }, abstract = {The development of sequencing technology has promoted discovery of variants in the human genome. Identifying functions of these variants is important for us to link genotype to phenotype, and to diagnose diseases. However, it usually requires researchers to visit multiple databases. Here, we presented a one-stop webserver for variant function annotation tools (VCAT, https://biomed.nscc-gz.cn/zhaolab/VCAT/) that is the first one connecting variant to functions via the epigenome, protein, drug and RNA. VCAT is also the first one to make all annotations visualized in interactive charts or molecular structures. VCAT allows users to upload data in VCF format, and download results via a URL. Moreover, VCAT has annotated a huge number (1,262,041,068) of variants collected from dbSNP, 1000 Genomes projects, gnomAD, ICGC, TCGA, and HPRC Pangenome project. For these variants, users are able to searcher their functions, related diseases and drugs from VCAT. In summary, VCAT provides a one-stop webserver to explore the potential functions of human genomic variants including their relationship with diseases and drugs.}, } @article {pmid39191555, year = {2024}, author = {Tiwari, VK and Saripalli, G and Sharma, PK and Poland, J}, title = {Wheat genomics: genomes, pangenomes, and beyond.}, journal = {Trends in genetics : TIG}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tig.2024.07.004}, pmid = {39191555}, issn = {0168-9525}, abstract = {There is an urgent need to improve wheat for upcoming challenges, including biotic and abiotic stresses. Sustainable wheat improvement requires the introduction of new genes and alleles in high-yielding wheat cultivars. Using new approaches, tools, and technologies to identify and introduce new genes in wheat cultivars is critical. High-quality genomes, transcriptomes, and pangenomes provide essential resources and tools to examine wheat closely to identify and manipulate new and targeted genes and alleles. Wheat genomics has improved excellently in the past 5 years, generating multiple genomes, pangenomes, and transcriptomes. Leveraging these resources allows us to accelerate our crop improvement pipelines. This review summarizes the progress made in wheat genomics and trait discovery in the past 5 years.}, } @article {pmid39191402, year = {2024}, author = {Aoun, N and Georgoulis, SJ and Avalos, JK and Grulla, KJ and Miqueo, K and Tom, C and Lowe-Power, TM}, title = {A pangenomic atlas reveals eco-evolutionary dynamics that shape type VI secretion systems in plant-pathogenic Ralstonia.}, journal = {mBio}, volume = {}, number = {}, pages = {e0032324}, doi = {10.1128/mbio.00323-24}, pmid = {39191402}, issn = {2150-7511}, abstract = {Soilborne Ralstonia solanacearum species complex (RSSC) pathogens disrupt microbial communities as they invade roots and fatally wilt plants. RSSC pathogens secrete antimicrobial toxins using a type VI secretion system (T6SS). To investigate how evolution and ecology have shaped the T6SS of these bacterial pathogens, we analyzed the T6SS gene content and architecture across the RSSC and their evolutionary relatives. Our analysis reveals that two ecologically similar Burkholderiaceae taxa, xylem-pathogenic RSSC and Paracidovorax, have convergently evolved to wield large arsenals of T6SS toxins. To understand the mechanisms underlying genomic enrichment of T6SS toxins, we compiled an atlas of 1,066 auxiliary T6SS toxin clusters ("aux" clusters) across 99 high-quality RSSC genomes. We classified 25 types of aux clusters with toxins that predominantly target lipids, nucleic acids, or unknown cellular substrates. The aux clusters were located in diverse genetic neighborhoods and had complex phylogenetic distributions, suggesting frequent horizontal gene flow. Phages and other mobile genetic elements account for most of the aux cluster acquisition on the chromosome but very little on the megaplasmid. Nevertheless, RSSC genomes were more enriched in aux clusters on the megaplasmid. Although the single, ancestral T6SS was broadly conserved in the RSSC, the T6SS has been convergently lost in atypical, non-soilborne lineages. Overall, our data suggest dynamic interplay between the lifestyle of RSSC lineages and the evolution of T6SSes with robust arsenals of toxins. This pangenomic atlas poises the RSSC as an emerging, tractable model to understand the role of the T6SS in shaping pathogen populations.IMPORTANCEWe explored the eco-evolutionary dynamics that shape the inter-microbial warfare mechanisms of a globally significant plant pathogen, the Ralstonia solanacearum species complex. We discovered that most Ralstonia wilt pathogens have evolved extensive and diverse repertoires of type VI secretion system-associated antimicrobial toxins. These expansive toxin arsenals potentially enhance the ability of Ralstonia pathogens to invade plant microbiomes, enabling them to rapidly colonize and kill their host plants. We devised a classification system to categorize the Ralstonia toxins. Interestingly, many of the toxin gene clusters are encoded on mobile genetic elements, including prophages, which may be mutualistic symbionts that enhance the inter-microbial competitiveness of Ralstonia wilt pathogens. Moreover, our findings suggest that the convergent loss of this multi-gene trait contributes to genome reduction in two vector-transmitted lineages of Ralstonia pathogens. Our findings demonstrate that the interplay between microbial ecology and pathogen lifestyle shapes the evolution of a genetically complex antimicrobial weapon.}, } @article {pmid39189818, year = {2024}, author = {Olawoye, IB and Waglechner, N and McIntosh, F and Akochy, PM and Cloutier, N and Grandjean Lapierre, S and Tannir, B and Greenaway, C and Matouk, E and Poirier, L and Levesque, RC and Boyle, B and Quach, C and Soualhine, H and Batt, J and Behr, MA and Lee, RS and Guthrie, JL}, title = {Genomic Epidemiology of Mycobacterium abscessus on the Island of Montréal Not Suggestive of Healthcare-associated Person-to-Person Transmission.}, journal = {The Journal of infectious diseases}, volume = {}, number = {}, pages = {}, doi = {10.1093/infdis/jiae407}, pmid = {39189818}, issn = {1537-6613}, abstract = {BACKGROUND: Mycobacterium abscessus complex (MABC), an opportunistic nontuberculous mycobacteria (NTM), can lead to poor clinical outcomes in pulmonary infections. Conflicting data exist on person-to-person transmission of MABC within and across healthcare facilities. To investigate further, a comprehensive retrospective study across five healthcare institutions on the Island of Montréal was undertaken.

METHODS: We analyzed the genomes of 221 MABC isolates obtained from 115 individuals (2010-2018) to identify possible links. Genetic similarity, defined as ≤25 single-nucleotide polymorphisms (SNPs), was investigated through a blinded epidemiological inquiry.

RESULTS: Bioinformatics analyses identified 28 sequence types (STs), including globally observed dominant circulating clones (DCCs). Further analysis revealed 210 isolate pairs within the SNP threshold. Among these pairs, there was one possible lab contamination where isolates from different patients processed in the same lab differed by only 2 SNPs. There were 37 isolate pairs from patients who had provided specimens from the same hospital; however, epidemiological analysis found no evidence of healthcare-associated person-to-person transmission between these patients. Additionally, pan-genome analysis showed higher discriminatory power than core genome analysis for examining genomic similarity.

CONCLUSIONS: Genomics alone is insufficient to establish MABC transmission, particularly considering the genetic similarity and wide distribution of DCCs, although pan-genome analysis has the potential to add further insight. Our findings indicate that MABC infections in Montréal are unlikely attributable to healthcare-associated person-to-person transmission.}, } @article {pmid39185728, year = {2024}, author = {Švara, A and Sun, H and Fei, Z and Khan, A}, title = {Advancing apple genetics research: Malus coronaria and Malus ioensis genomes and a gene family-based pangenome of native North American apples.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {}, number = {}, pages = {}, doi = {10.1093/dnares/dsae026}, pmid = {39185728}, issn = {1756-1663}, abstract = {Wild Malus species flourished in North America long before Europeans introduced domesticated apples. Malus coronaria and M. ioensis are native to the mid-western and eastern USA, while M. angustifolia and M. fusca grow in the southeast and west, respectively. They offer disease resistance, climate and soil adaptability, and horticultural traits for apple breeding. However, their utilization remains limited due to insufficient genomic resources and specific genetics. We report high-quality phased chromosome-scale assemblies of M. coronaria and M. ioensis, generated using long-read and conformation capture sequencing. Phylogenetic and synteny analysis indicated high relatedness between these two genomes and previously-published genome of M. angustifolia, and lower relatedness with M. fusca. Gene family-based pangenome of North American Malus identified 60,211 orthogroups containing 340,087 genes. Genes involved in basic cellular and metabolic processes, growth, and development were core to the existence of these species, whereas genes involved in secondary metabolism, stress response, and interactions with other organisms were accessory and are likely associated with adaptation to specific environments. Structural variation hotspots were mostly overlapping with high gene density. This study offers novel native North American Malus genome resources that can be used to identify genes for apple breeding and understand their evolution and adaptation.}, } @article {pmid39182659, year = {2024}, author = {Arjun, OK and Sethi, M and Parida, D and Dash, J and Kumar Das, S and Prakash, T and Senapati, S}, title = {Comprehensive physiological and genomic characterization of a potential probiotic strain, Lactiplantibacillus plantarum ILSF15, isolated from the gut of tribes of Odisha, India.}, journal = {Gene}, volume = {}, number = {}, pages = {148882}, doi = {10.1016/j.gene.2024.148882}, pmid = {39182659}, issn = {1879-0038}, abstract = {Characterizing probiotic features of organisms isolated from diverse environments can lead to the discovery of novel strains with promising functional features and health attributes. The present study attempts to characterize a novel probiotic strain isolated from the gut of the tribal population of Odisha, India. Based on 16S rRNA-based phylogeny, the strain was identified as a species of the Lactiplantibacillus genus and was named Lactiplantibacillus plantarum strain ILSF15. The current investigation focuses on elucidating this strain's genetic and physiological properties associated with probiotic attributes such as biosafety risk, host adaptation/survival traits, and beneficial functional features. The novel strain was observed, in vitro, exhibiting features such as acid/bile tolerance, adhesion to the host enteric epithelial cells, cholesterol assimilation, and pathogen exclusion, indicating its ability to survive the harsh environment of the human GIT and resist the growth of harmful microorganisms. Additionally, the L. plantarum ILSF15 strain was found to harbor genes associated with the metabolism and synthesis of various bioactive molecules, including amino acids, carbohydrates, lipids, and vitamins, highlighting the organism's ability to efficiently utilize diverse resources and contribute to the host's nutrition and health. Several genes involved in host adaptation/survival strategies and host-microbe interactions were also identified from the ILSF15 genome. Moreover, L. plantarum strains, in general, were found to have an open pangenome characterized by high genetic diversity and the absence of specific lineages associated with particular habitats, signifying its versatile nature and potential applications in probiotic and functional food industries.}, } @article {pmid39181885, year = {2024}, author = {He, H and Leng, Y and Cao, X and Zhu, Y and Li, X and Yuan, Q and Zhang, B and He, W and Wei, H and Liu, X and Xu, Q and Guo, M and Zhang, H and Yang, L and Lv, Y and Wang, X and Shi, C and Zhang, Z and Chen, W and Zhang, B and Wang, T and Yu, X and Qian, H and Zhang, Q and Dai, X and Liu, C and Cui, Y and Wang, Y and Zheng, X and Xiong, G and Zhou, Y and Qian, Q and Shang, L}, title = {The pan-tandem repeat map highlights multiallelic variants underlying gene expression and agronomic traits in rice.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7291}, pmid = {39181885}, issn = {2041-1723}, support = {Y2023QC36//Youth Innovation Promotion Association of the Chinese Academy of Sciences (Youth Innovation Promotion Association CAS)/ ; }, mesh = {*Oryza/genetics/growth & development/metabolism ; *Quantitative Trait Loci ; *Alleles ; *Gene Expression Regulation, Plant ; *Genome, Plant ; *Tandem Repeat Sequences/genetics ; Chromosome Mapping ; Polymorphism, Single Nucleotide ; Phenotype ; Genetic Variation ; }, abstract = {Tandem repeats (TRs) are genomic regions that tandemly change in repeat number, which are often multiallelic. Their characteristics and contributions to gene expression and quantitative traits in rice are largely unknown. Here, we survey rice TR variations based on 231 genome assemblies and the rice pan-genome graph. We identify 227,391 multiallelic TR loci, including 54,416 TR variations that are absent from the Nipponbare reference genome. Only 1/3 TR variations show strong linkage with nearby bi-allelic variants (SNPs, Indels and PAVs). Using 193 panicle and 202 leaf transcriptomic data, we reveal 485 and 511 TRs act as QTLs independently of other bi-allelic variations to nearby gene expression, respectively. Using plant height and grain width as examples, we identify and validate TRs contributions to rice agronomic trait variations. These findings would enhance our understanding of the functions of multiallelic variants and facilitate rice molecular breeding.}, } @article {pmid39179660, year = {2024}, author = {Wang, S and Sun, S and Wang, Q and Chen, H and Guo, Y and Cai, M and Yin, Y and Ma, S and Wang, H}, title = {PathoTracker: an online analytical metagenomic platform for Klebsiella pneumoniae feature identification and outbreak alerting.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1038}, pmid = {39179660}, issn = {2399-3642}, support = {32141001//National Natural Science Foundation of China (National Science Foundation of China)/ ; 81991533//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Klebsiella pneumoniae/genetics/isolation & purification ; *Metagenomics/methods ; Humans ; *Disease Outbreaks ; *Klebsiella Infections/microbiology/epidemiology/diagnosis ; *Phylogeny ; China/epidemiology ; Nanopore Sequencing/methods ; Databases, Genetic ; Genome, Bacterial ; }, abstract = {Clinical metagenomics (CMg) Nanopore sequencing can facilitate infectious disease diagnosis. In China, sub-lineages ST11-KL64 and ST11-KL47 Carbapenem-resistant Klebsiella pneumoniae (CRKP) are widely prevalent. We propose PathoTracker, a specially compiled database and arranged method for strain feature identification in CMg samples and CRKP traceability. A database targeting high-prevalence horizontal gene transfer in CRKP strains and a ST11-only database for distinguishing two sub-lineages in China were created. To make the database user-friendly, facilitate immediate downstream strain feature identification from raw Nanopore metagenomic data, and avoid the need for phylogenetic analysis from scratch, we developed data analysis methods. The methods included pre-performed phylogenetic analysis, gene-isolate-cluster index and multilevel pan-genome database and reduced storage space by 10-fold and random-access memory by 52-fold compared with normal methods. PathoTracker can provide accurate and fast strain-level analysis for CMg data after 1 h Nanopore sequencing, allowing early warning of outbreaks. A user-friendly page (http://PathoTracker.pku.edu.cn/) was developed to facilitate online analysis, including strain-level feature, species identifications and phylogenetic analyses. PathoTracker proposed in this study will aid in the downstream analysis of CMg.}, } @article {pmid39174505, year = {2024}, author = {Fang, Y and Xiao, X and Lin, J and Lin, Q and Wang, J and Liu, K and Li, Z and Xing, J and Liu, Z and Wang, B and Qi, Y and Long, X and Zeng, X and Hu, Y and Qi, J and Qin, Y and Yang, J and Zhang, Y and Zhang, S and Ye, D and Zhang, J and Liu, J and Tang, C}, title = {Pan-genome and phylogenomic analyses highlight Hevea species delineation and rubber trait evolution.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7232}, pmid = {39174505}, issn = {2041-1723}, mesh = {*Hevea/genetics ; *Genome, Plant ; *Phylogeny ; *Rubber/metabolism ; Plant Breeding ; Genetic Variation ; Evolution, Molecular ; Multigene Family ; }, abstract = {The para rubber tree (Hevea brasiliensis) is the world's sole commercial source of natural rubber, a vital industrial raw material. However, the narrow genetic diversity of this crop poses challenges for rubber breeding. Here, we generate high-quality de novo genome assemblies for three H. brasiliensis cultivars, two H. brasiliensis wild accessions, and three other Hevea species (H. nitida, H. pauciflora, and H. benthamiana). Through analyzing genomes of 94 Hevea accessions, we identify five distinct lineages that do not align with their previous species delineations. We discover multiple accessions with hybrid origins between these lineages, indicating incomplete reproductive isolation between them. Only two out of four wild lineages have been introduced to commercial rubber cultivars. Furthermore, we reveal that the rubber production traits emerged following the development of a large REF/SRPP gene cluster and its functional specialization in rubber-producing laticifers within this genus. These findings would enhance rubber breeding and benefit research communities.}, } @article {pmid39170454, year = {2024}, author = {Mangal, V and Verma, LK and Singh, SK and Saxena, K and Roy, A and Karn, A and Rohit, R and Kashyap, S and Bhatt, A and Sood, S}, title = {Triumphs of genomic-assisted breeding in crop improvement.}, journal = {Heliyon}, volume = {10}, number = {15}, pages = {e35513}, pmid = {39170454}, issn = {2405-8440}, abstract = {Conventional breeding approaches have played a significant role in meeting the food demand remarkably well until now. However, the increasing population, yield plateaus in certain crops, and limited recombination necessitate using genomic resources for genomics-assisted crop improvement programs. As a result of advancements in the next-generation sequence technology, GABs have developed dramatically to characterize allelic variants and facilitate their rapid and efficient incorporation in crop improvement programs. Genomics-assisted breeding (GAB) has played an important role in harnessing the potential of modern genomic tools, exploiting allelic variation from genetic resources and developing cultivars over the past decade. The availability of pangenomes for major crops has been a significant development, albeit with varying degrees of completeness. Even though adopting these technologies is essentially determined on economic grounds and cost-effective assays, which create a wealth of information that can be successfully used to exploit the latent potential of crops. GAB has been instrumental in harnessing the potential of modern genomic resources and exploiting allelic variation for genetic enhancement and cultivar development. GAB strategies will be indispensable for designing future crops and are expected to play a crucial role in breeding climate-smart crop cultivars with higher nutritional value.}, } @article {pmid39166875, year = {2024}, author = {Chan, DTC and Bernstein, HC}, title = {Pangenomic landscapes shape performances of a synthetic genetic circuit across Stutzerimonas species.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0084924}, doi = {10.1128/msystems.00849-24}, pmid = {39166875}, issn = {2379-5077}, abstract = {Engineering identical genetic circuits into different species typically results in large differences in performance due to the unique cellular environmental context of each host, a phenomenon known as the "chassis-effect" or "context-dependency". A better understanding of how genomic and physiological contexts underpin the chassis-effect will improve biodesign strategies across diverse microorganisms. Here, we combined a pangenomic-based gene expression analysis with quantitative measurements of performance from an engineered genetic inverter device to uncover how genome structure and function relate to the observed chassis-effect across six closely related Stutzerimonas hosts. Our results reveal that genome architecture underpins divergent responses between our chosen non-model bacterial hosts to the engineered device. Specifically, differential expression of the core genome, gene clusters shared between all hosts, was found to be the main source of significant concordance to the observed differential genetic device performance, whereas specialty genes from respective accessory genomes were not significant. A data-driven investigation revealed that genes involved in denitrification and components of trans-membrane transporter proteins were among the most differentially expressed gene clusters between hosts in response to the genetic device. Our results show that the chassis-effect can be traced along differences among the most conserved genome-encoded functions and that these differences create a unique biodesign space among closely related species.IMPORTANCEContemporary synthetic biology endeavors often default to a handful of model organisms to host their engineered systems. Model organisms such as Escherichia coli serve as attractive hosts due to their tractability but do not necessarily provide the ideal environment to optimize performance. As more novel microbes are domesticated for use as biotechnology platforms, synthetic biologists are urged to explore the chassis-design space to optimize their systems and deliver on the promises of synthetic biology. The consequences of the chassis-effect will therefore only become more relevant as the field of biodesign grows. In our work, we demonstrate that the performance of a genetic device is highly dependent on the host environment it operates within, promoting the notion that the chassis can be considered a design variable to tune circuit function. Importantly, our results unveil that the chassis-effect can be traced along similarities in genome architecture, specifically the shared core genome. Our study advocates for the exploration of the chassis-design space and is a step forward to empowering synthetic biologists with knowledge for more efficient exploration of the chassis-design space to enable the next generation of broad-host-range synthetic biology.}, } @article {pmid39166872, year = {2024}, author = {Wang, L and Cheng, X and Guo, Y and Cao, J and Sun, M and Hwang, J-S and Liu, R and Fang, J}, title = {Novel isolates of hydrogen-oxidizing chemolithoautotrophic Sulfurospirillum provide insight to the functions and adaptation mechanisms of Campylobacteria in shallow-water hydrothermal vents.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0014824}, doi = {10.1128/msystems.00148-24}, pmid = {39166872}, issn = {2379-5077}, abstract = {Enhancing the availability of representative isolates from hydrothermal vents (HTVs) is imperative for comprehending the microbial processes that propel the vent ecosystem. In recent years, Campylobacteria have emerged as the predominant and ubiquitous taxon across both shallow and deep-sea vent systems. Nevertheless, only a few isolates have been cultured, primarily originating from deep-sea HTVs. Presently, no cultivable isolates of Campylobacteria are accessible in shallow water vent systems (<200 m), which exhibit markedly distinct environmental conditions from their deep-sea counterparts. In this study, we enriched a novel isolate (genus Sulfurospirillum, Campylobacteria) from shallow-water HTVs of Kueishan Island. Genomic and physiological analysis revealed that this novel Campylobacteria species grows on a variety of substrate and carbon/energy sources. The pan-genome and phenotypic comparisons with 12 previously isolated Sulfurospirillum species from different environments supported the identification of functional features in Sulfurospirillum genomes crucial for adaptation to vent environments, such as sulfur oxidation, carbon fixation, biofilm formation, and benzoate/toluene degradation, as well as diverse genes related with signal transportation. To conclude, the metabolic characteristics of this novel Campylobacteria augment our understanding of Campylobacteria spanning from deep-sea to shallow-water vent systems.IMPORTANCECampylobacteria emerge as the dominant and ubiquitous taxa within vent systems, playing important roles in the vent ecosystems. However, isolated representatives of Campylobacteria have been mainly from the deep-sea hydrothermal fields, leaving a significant knowledge gap regarding the functions, activities, and adaptation strategies of the vent microorganisms in shallow-water hydrothermal vents (HTVs). This study bridges this gap by providing insights into the phenomics and genomic diversity of genus Sulfurospirillum (order Campylobacterales, class Campylobacteria) based on data derived from a novel isolate obtained from shallow-water HTVs. Our mesophilic isolate of Sulfurospirillum not only augments the genus diversity of Campylobacteria pure cultures derived from vent systems but also serves as the inaugural reference isolate for Campylobacteria in shallow-water environments.}, } @article {pmid39165128, year = {2024}, author = {Trost, K and Knopp, MR and Wimmer, JLE and Tria, FDK and Martin, WF}, title = {A universal and constant rate of gene content change traces pangenome flux to LUCA.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnae068}, pmid = {39165128}, issn = {1574-6968}, abstract = {Prokaryotic genomes constantly undergo gene flux via lateral gene transfer, generating a pangenome structure consisting of a conserved core genome surrounded by a more variable accessory genome shell. Over time, flux generates change in genome content. Here we measure and compare the rate of genome flux for 5 655 prokaryotic genomes as a function of amino acid sequence divergence in 36 universally distributed proteins of the informational core (IC). We find a clock of gene content change. The long-term average rate of gene content flux is remarkably constant across all higher prokaryotic taxa sampled, whereby the size of the accessory genome-the proportion of the genome harboring gene content difference for genome pairs-varies across taxa. The proportion of species-level accessory genes per genome, varies from 0% (Chlamydia) to 30-33% (Alphaproteobacteria, Gammaproteobacteria, Clostridia). A clock-like rate of gene content change across all prokaryotic taxa sampled suggest that pangenome structure is a general feature of prokaryotic genomes and that it has been in existence since the divergence of bacteria and archaea.}, } @article {pmid39162515, year = {2024}, author = {Wang, Z and Hülpüsch, C and Foesel, B and Traidl-Hoffmann, C and Reiger, M and Schloter, M}, title = {Genomic and functional divergence of Staphylococcus aureus strains from atopic dermatitis patients and healthy individuals: insights from global and local scales.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0057124}, doi = {10.1128/spectrum.00571-24}, pmid = {39162515}, issn = {2165-0497}, abstract = {Atopic dermatitis (AD) is the most common chronic inflammatory skin disease worldwide and is characterized by a complex interplay with skin microbiota, with Staphylococcus aureus often abnormally more abundant in AD patients than in healthy individuals (HE). S. aureus harbors diverse strains with varied genetic compositions and functionalities, which exhibit differential connections with the severity of AD. However, the differences in S. aureus strains between AD and HE remain unclear, with most variations seen at a specific geographic level, implying spontaneous adaptations rather than systematic distinctions. This study presents genomic and functional differences between these S. aureus strains from AD and HE on both global and local levels. We observed reduced gene content diversity but increased functional variation in the global AD-associated strains. Two additional AD-dominant clusters emerged, with Cluster 1 enriched in transposases and Cluster 2 showcasing genes linked to adaptability and antibiotic resistance. Particularly, robust evidence illustrates that the lantibiotic operon of S. aureus, involved in the biosynthesis of lantibiotics, was acquired via horizontal gene transfer from environmental bacteria. Comparisons of the gene abundance profiles in functional categories also indicate limited zoonotic potential between human and animal isolates. Local analysis mirrored global gene diversity but showed distinct functional variations between AD and HE strains. Overall, this research provides foundational insights into the genomic evolution, adaptability, and antibiotic resistance of S. aureus, with significant implications for clinical microbiology.IMPORTANCEOur study uncovers significant genomic variations in Staphylococcus aureus strains associated with atopic dermatitis. We observed adaptive evolution tailored to the disease microenvironment, characterized by a smaller pan-genome than strains from healthy skin both on the global and local levels. Key functional categories driving strain diversification include "replication and repair" and "transporters," with transposases being pivotal. Interestingly, the local strains predominantly featured metal-related genes, whereas global ones emphasized antimicrobial resistances, signifying scale-dependent diversification nuances. We also pinpointed horizontal gene transfer events, indicating interactions between human-associated and environmental bacteria. These insights expand our comprehension of S. aureus's genetic adaptation in atopic dermatitis, yielding valuable implications for clinical approaches.}, } @article {pmid39160127, year = {2024}, author = {Song, Y and Long, C and Wang, Y and An, Y and Lu, Y}, title = {Advancements in multi-omics for nutraceutical enhancement and traits improvement in buckwheat.}, journal = {Critical reviews in biotechnology}, volume = {}, number = {}, pages = {1-26}, doi = {10.1080/07388551.2024.2373282}, pmid = {39160127}, issn = {1549-7801}, abstract = {Buckwheat (Fagopyrum spp.) is a typical pseudocereal, valued for its extensive nutraceutical potential as well as its centuries-old cultivation. Tartary buckwheat and common buckwheat have been used globally and become well-known nutritious foods due to their high quantities of: proteins, flavonoids, and minerals. Moreover, its increasing demand makes it critical to improve nutraceutical, traits and yield. In this review, bioactive compounds accumulated in buckwheat were comprehensively evaluated according to their chemical structure, properties, and physiological function. Biosynthetic pathways of flavonoids, phenolic acids, and fagopyrin were methodically summarized, with the regulation of flavonoid biosynthesis. Although there are classic synthesis pathways presented in the previous research, the metabolic flow of how these certain compounds are being synthesized in buckwheat still remains uncovered. The functional genes involved in the biosynthesis of flavonols, stress response, and plant development were identified based on multi-omics research. Furthermore, it delves into the applications of multi-omics in improving buckwheat's agronomic traits, including: yield, nutritional content, stress resilience, and bioactive compounds biosynthesis. While pangenomics combined with other omics to mine elite genes, the regulatory network and mechanism of specific agronomic traits and biosynthetic of bioactive components, and developing a more efficient genetic transformation system for genetic engineering require further investigation for the execution of breeding designs aimed at enhancing desirable traits in buckwheat. This critical review will provide a comprehensive understanding of multi-omics for nutraceutical enhancement and traits improvement in buckwheat.}, } @article {pmid39158344, year = {2024}, author = {Klingström, T and Zonabend König, E and Zwane, AA}, title = {Beyond the hype: using AI, big data, wearable devices, and the internet of things for high-throughput livestock phenotyping.}, journal = {Briefings in functional genomics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bfgp/elae032}, pmid = {39158344}, issn = {2041-2657}, support = {//Livestock Genetics Flagship of the Livestock CGIAR Research Program/ ; }, abstract = {Phenotyping of animals is a routine task in agriculture which can provide large datasets for the functional annotation of genomes. Using the livestock farming sector to study complex traits enables genetics researchers to fully benefit from the digital transformation of society as economies of scale substantially reduces the cost of phenotyping animals on farms. In the agricultural sector genomics has transitioned towards a model of 'Genomics without the genes' as a large proportion of the genetic variation in animals can be modelled using the infinitesimal model for genomic breeding valuations. Combined with third generation sequencing creating pan-genomes for livestock the digital infrastructure for trait collection and precision farming provides a unique opportunity for high-throughput phenotyping and the study of complex traits in a controlled environment. The emphasis on cost efficient data collection mean that mobile phones and computers have become ubiquitous for cost-efficient large-scale data collection but that the majority of the recorded traits can still be recorded manually with limited training or tools. This is especially valuable in low- and middle income countries and in settings where indigenous breeds are kept at farms preserving more traditional farming methods. Digitalization is therefore an important enabler for high-throughput phenotyping for smaller livestock herds with limited technology investments as well as large-scale commercial operations. It is demanding and challenging for individual researchers to keep up with the opportunities created by the rapid advances in digitalization for livestock farming and how it can be used by researchers with or without a specialization in livestock. This review provides an overview of the current status of key enabling technologies for precision livestock farming applicable for the functional annotation of genomes.}, } @article {pmid39158313, year = {2024}, author = {Rana, R and Nayak, PK and Madhavan, VN and Sonti, RV and Patel, HK and Patil, PB}, title = {Comparative genomics-based insights into Xanthomonas indica, a non-pathogenic species of healthy rice microbiome with bioprotection function.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0084824}, doi = {10.1128/aem.00848-24}, pmid = {39158313}, issn = {1098-5336}, abstract = {Xanthomonas species are major pathogens of plants and have been studied extensively. There is increasing recognition of the importance of non-pathogenic species within the same genus. With this came the need to understand the genomic and functional diversity of non-pathogenic Xanthomonas (NPX) at the species and strain level. This study reports isolation and investigation into the genomic diversity and variation in NPX isolates, chiefly Xanthomonas indica, a newly discovered NPX species from rice. The study establishes the relationship of X. indica strains within clade I of Xanthomonads with another NPX species, X. sontii, also associated with rice seeds. Identification of highly diverse strains, open-pan genome, and systematic hyper-variation at the lipopolysaccharide biosynthetic locus when compared to pathogenic Xanthomonas indicates the acquisition of new functions for adaptation. Furthermore, comparative genomics studies established the absence of major virulence genes such as type III secretion system and effectors, which are present in the pathogens, and the presence of a known bacterial-killing type IV secretion system (X-T4SS). The diverse non-pathogenic strains of X. indica and X. sontii were found to protect rice from bacterial leaf blight pathogen, X. oryzae pv. oryzae (Xoo). The absence of phenotype of an X-T4SS mutant suggests redundancy in the genetic basis of the mechanisms involved in the bioprotection function, which may include multiple genetic loci, such as putative bacteriocin-encoding gene clusters and involvement of other factors such as nutrient and niche competition apart from induction of innate immunity through shared microbial-associated molecular patterns. The rice-NPX community and its pathogenic counterpart can be a promising model for understanding plant-microbe-microbiome interaction studies.IMPORTANCEThe Xanthomonas group of bacteria is known for its characteristic lifestyle as a phytopathogen. However, the discovery of non-pathogenic Xanthomonas (NPX) species is a major shift in understanding this group of bacteria. Multi-strain, in-depth genomic, evolutionary and functional studies on each of these NPX species are still lacking. This study on diverse non-pathogenic strains provides novel insights into genome diversity, dynamics, and evolutionary trends of NPX species from rice microbiome apart from its relationship with other relatives that form a sub-clade. Interestingly, we also uncovered that NPX species protect rice from pathogenic Xanthomonas species. The plant protection property shows their importance as a part of a healthy plant microbiome. Furthermore, finding an open pan-genome and large-scale variation at lipopolysaccharide biosynthetic locus indicates a significant role of the NPX community in host adaptation. The findings and high-quality genomic resources of NPX species and the strains will allow further systematic molecular and host-associated microbial community studies for plant health.}, } @article {pmid39155697, year = {2024}, author = {Mederos, MA and Court, CM and Dipardo, BJ and Pisegna, JR and Dawson, DW and Joe Hines, O and Donahue, TR and Graeber, TG and Girgis, MD and Tomlinson, JS}, title = {Oncogenic pathway signatures predict the risk of progression and recurrence in well-differentiated pancreatic neuroendocrine tumors.}, journal = {Journal of surgical oncology}, volume = {}, number = {}, pages = {}, doi = {10.1002/jso.27830}, pmid = {39155697}, issn = {1096-9098}, support = {P01CA168585//NCI/NIH/ ; R01CA222877//NCI/NIH/ ; R01CA227089//NCI/NIH/ ; P50CA092131//NCI/NIH/ ; //W.M. Keck Foundation/ ; //Hirshberg Foundation for Pancreatic Research/ ; }, abstract = {BACKGROUND: Pancreatic neuroendocrine tumors (pNETs) are genomically diverse tumors. The management of newly diagnosed well-differentiated pNETs is limited by a lack of sensitivity of existing biomarkers for prognostication. Our goal was to investigate the potential utility of genetic markers as a predictor of progression-free survival (PFS) and recurrence-free survival (RFS).

METHODS: Whole-exome sequencing of resected well-differentiated, low and intermediate-grade (G1 and G2) pNETs and normal adjacent tissue from patients who underwent resection from 2005 to 2015 was performed. Genetic alterations were classified using pan-genomic and oncogenic pathway classifications. Additional samples with genetic and clinicopathologic data available were obtained from the publicly available International Cancer Genome Consortium (ICGC) database and included in the analysis. The prognostic relevance of these genomic signatures on PFS and RFS was analyzed.

RESULTS: Thirty-one patients who underwent resection for pNET were identified. Genomic analysis of mutational, copy number, cytogenetic, and complex phenomena revealed similar patterns to prior studies of pNETs with relatively few somatic gene mutations but numerous instances of copy number changes. Analysis of genomic and clinicopathologic outcomes using the combined data from our study as well as the ICGC pNET cohort (n = 124 patients) revealed that the recurrent pattern of whole chromosome loss (RPCL) and metastatic disease were independently associated with disease progression. When evaluating patients with local disease at the time of resection, RPCL and alterations in the TGFβ oncogenic pathway were independently associated with the risk of recurrence.

CONCLUSIONS: Well-differentiated pNETs are genomically diverse tumors. Pathway signatures may be prognostic for predicting disease progression and recurrence.}, } @article {pmid39151939, year = {2024}, author = {Woodhouse, MR and Cannon, EK and Portwood, JL and Gardiner, JM and Hayford, RK and Haley, O and Andorf, CM}, title = {Tools and Resources at the Maize Genetics and Genomics Database (MaizeGDB).}, journal = {Cold Spring Harbor protocols}, volume = {}, number = {}, pages = {}, doi = {10.1101/pdb.over108430}, pmid = {39151939}, issn = {1559-6095}, abstract = {The Maize Genetics and Genomics Database (MaizeGDB) is the community resource for maize researchers, offering a suite of tools, informatics resources, and curated data sets to support maize genetics, genomics, and breeding research. Here, we provide an overview of the key resources available at MaizeGDB, including maize genomes, comparative genomics, and pan-genomics tools. This review aims to familiarize users with the range of options available for maize research and highlights the importance of MaizeGDB as a central hub for the maize research community. By providing a detailed snapshot of the database's capabilities, we hope to enable researchers to make use of MaizeGDB's resources, ultimately assisting them to better study the evolution and diversity of maize.}, } @article {pmid39149335, year = {2024}, author = {Ma, W and Chaisson, MJ}, title = {High-resolution global diversity copy number variation maps and association with ctyper.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.11.607269}, pmid = {39149335}, issn = {2692-8205}, abstract = {Genetic analysis of copy number variations (CNVs), especially in complex regions, is challenging due to reference bias and ambiguous alignment of Next-Generation Sequencing (NGS) reads to repetitive DNA. Consequently, aggregate copy numbers are typically analyzed, overlooking variation between gene copies. Pangenomes contain diverse sequences of gene copies and enable the study of sequence-resolved CNVs. We developed a method, ctyper, to discover sequence-resolved CNVs in NGS data by leveraging CNV genes from pangenomes. From 118 public assemblies, we constructed a database of 3,351 CNV genes, distinguishing each gene copy as a resolved allele. We used phylogenetic trees to organize alleles into highly similar allele-types that revealed events of linked small variants due to stratification, structural variation, conversion, and duplication. Saturation analysis showed that new samples share an average of 97.8% CNV alleles with the database. The ctyper method traces individual gene copies in NGS data to their nearest alleles in the database and identifies allele-specific copy numbers using multivariate linear regression on k-mer counts and phylogenetic clustering. Applying ctyper to 1000 Genomes Project (1kgp) samples showed Hardy-Weinberg Equilibrium on 99.3% of alleles and a 97.6% F1 score on genotypes based on 641 1kgp trios. Leave-one-out analysis on 39 assemblies matched to 1kgp samples showed that 96.5% of variants in query sequences match the genotyped allele. Genotyping 1kgp data revealed 226 population-specific CNVs, including a conversion on SMN2 to SMN1, potentially impacting Spinal Muscular Atrophy diagnosis in Africans. Our results revealed two models of CNV: recent CNVs due to ongoing duplications and polymorphic CNVs from ancient paralogs missing from the reference. To measure the functional impact of CNVs, after merging allele-types, we conducted genome-wide Quantitative Trait Locus analysis on 451 1kgp samples with Geuvadis rRNA-seqs. Using a linear mixed model, our genotyping enables the inference of relative expression levels of paralogs within a gene family. In a global evolutionary context, 150 out of 1,890 paralogs (7.94%) and 546 out of 16,628 orthologs (3.28%) had significantly different expression levels, suggesting divergent expression from original genes. Specific examples include lower expression on the converted SMN and increased expression on translocated AMY2B (GTEx pancreas data). Our method enables large cohort studies on complex CNVs to uncover hidden health impacts and overcome reference bias.}, } @article {pmid39147372, year = {2024}, author = {Jara-Servin, A and Mejia, G and Romero, MF and Peimbert, M and Alcaraz, LD}, title = {Unravelling the genomic and environmental diversity of the ubiquitous Solirubrobacter.}, journal = {Environmental microbiology}, volume = {26}, number = {8}, pages = {e16685}, doi = {10.1111/1462-2920.16685}, pmid = {39147372}, issn = {1462-2920}, support = {CVU 725278//Consejo Nacional de Ciencia y Tecnología/ ; IN206824//Universidad Nacional Autónoma de México, DGAPA-PAPIIT-UNAM/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *Genome, Bacterial ; *RNA, Ribosomal, 16S/genetics ; Rhizosphere ; Genomics ; Metagenomics ; Genetic Variation ; }, abstract = {Solirubrobacter, though widespread in soils and rhizospheres, has been relatively unexplored despite its ubiquity. Previously acknowledged as a common soil bacterium, our research explores its phylogenomics, pangenomics, environmental diversity, and interactions within bacterial communities. By analysing seven genomic sequences, we have identified a pangenome consisting of 19,645 protein families, of which 2644 are shared across all studied genomes, forming the core genome. Interestingly, despite the non-motility of reported isolates, we discovered genes for flagellin and a partial flagellum assembly pathway. Examining the 16S ribosomal RNA genes of Solirubrobacter revealed substantial diversity, with 3166 operational taxonomic units identified in Mexican soils. Co-occurrence network analysis further demonstrated its significant integration within bacterial communities. Through phylogenomic scrutiny, we conclusively excluded the NCBI's GCA_009993245.1 genome from being classified as a Solirubrobacter. Our research into the metagenomic diversity of Solirubrobacter across various environments confirmed its presence in rhizospheres and certain soils, underscoring its adaptability. The geographical ubiquity of Solirubrobacter in rhizospheres raises intriguing questions regarding its potential interactions with plant hosts and the biotic and abiotic factors influencing its presence in soil. Given its ecological significance and genetic diversity, Solirubrobacter warrants further investigation as a potentially crucial yet underappreciated keystone species.}, } @article {pmid39144212, year = {2024}, author = {Gtari, M and Maaoui, R and Ghodhbane-Gtari, F and Ben Slama, K and Sbissi, I}, title = {MAGs-centric crack: how long will, spore-positive Frankia and most Protofrankia, microsymbionts remain recalcitrant to axenic growth?.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1367490}, pmid = {39144212}, issn = {1664-302X}, abstract = {Nearly 50 years after the ground-breaking isolation of the primary Comptonia peregrina microsymbiont under axenic conditions, efforts to isolate a substantial number of Protofrankia and Frankia strains continue with enduring challenges and complexities. This study aimed to streamline genomic insights through comparative and predictive tools to extract traits crucial for isolating specific Frankia in axenic conditions. Pangenome analysis unveiled significant genetic diversity, suggesting untapped potential for cultivation strategies. Shared metabolic strategies in cellular components, central metabolic pathways, and resource acquisition traits offered promising avenues for cultivation. Ecological trait extraction indicated that most uncultured strains exhibit no apparent barriers to axenic growth. Despite ongoing challenges, potential caveats, and errors that could bias predictive analyses, this study provides a nuanced perspective. It highlights potential breakthroughs and guides refined cultivation strategies for these yet-uncultured strains. We advocate for tailored media formulations enriched with simple carbon sources in aerobic environments, with atmospheric nitrogen optionally sufficient to minimize contamination risks. Temperature adjustments should align with strain preferences-28-29°C for Frankia and 32-35°C for Protofrankia-while maintaining an alkaline pH. Given potential extended incubation periods (predicted doubling times ranging from 3.26 to 9.60 days, possibly up to 21.98 days), patience and rigorous contamination monitoring are crucial for optimizing cultivation conditions.}, } @article {pmid39141833, year = {2024}, author = {Fortin, SG and Sun, X and Jayakumar, A and Ward, BB}, title = {Nitrite-oxidizing bacteria adapted to low oxygen conditions dominate nitrite oxidation in marine oxygen minimum zones.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae160}, pmid = {39141833}, issn = {1751-7370}, abstract = {Nitrite is a central molecule in the nitrogen cycle because nitrite oxidation to nitrate (an aerobic process) retains fixed nitrogen in a system and its reduction to dinitrogen gas (anaerobic) reduces the fixed nitrogen inventory. Despite its acknowledged requirement for oxygen, nitrite oxidation is observed in oxygen-depleted layers of the ocean's oxygen minimum zones (OMZs), challenging the current understanding of OMZ nitrogen cycling. Previous attempts to determine whether nitrite-oxidizing bacteria in the anoxic layer differ from known nitrite oxidizers in the open ocean were limited by cultivation difficulties and sequencing depth. Here, we construct 31 draft genomes of nitrite-oxidizing bacteria from global OMZs. The distribution of nitrite oxidation rates, abundance and expression of nitrite oxidoreductase genes, and relative abundance of nitrite-oxidizing bacterial draft genomes from the same samples all show peaks in the core of the oxygen-depleted zone (ODZ) and are all highly correlated in depth profiles within the major ocean oxygen minimum zones. The ODZ nitrite oxidizers are not found in the Tara Oceans global dataset (the most complete oxic ocean dataset), and the major nitrite oxidizers found in the oxygenated ocean do not occur in ODZ waters. A pangenomic analysis shows the ODZ nitrite oxidizers have distinct gene clusters compared to oxic nitrite oxidizers and are microaerophilic. These findings all indicate the existence of nitrite oxidizers whose niche is oxygen-deficient seawater. Thus, specialist nitrite-oxidizing bacteria are responsible for fixed nitrogen retention in marine oxygen minimum zones, with implications for control of the ocean's fixed nitrogen inventory.}, } @article {pmid39141228, year = {2024}, author = {Jouffe, C and Dyar, KA and Uhlenhaut, NH}, title = {Chromatin Immunoprecipitation in Adipose Tissue and Adipocytes: How to Proceed and Optimize the Protocol for Transcription Factor DNA Binding.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2846}, number = {}, pages = {35-45}, pmid = {39141228}, issn = {1940-6029}, mesh = {*Adipocytes/metabolism/cytology ; *Adipose Tissue/metabolism/cytology ; *Chromatin Immunoprecipitation/methods ; *DNA/metabolism/genetics ; *Transcription Factors/metabolism ; Humans ; Animals ; Protein Binding ; Chromatin/metabolism/genetics ; }, abstract = {Chromatin immunoprecipitation (ChIP) coupled to qPCR or sequencing is a crucial experiment to determine direct transcriptional regulation under the control of specific transcriptional factors or co-regulators at loci-specific or pan-genomic levels.Here we provide a reliable method for processing ChIP from adipocytes or frozen adipose tissue collection, isolation of nuclei, cross-linking of protein-DNA complexes, chromatin shearing, immunoprecipitation, and DNA purification. We also discuss critical steps for optimizing the experiment to perform a successful ChIP in lipid-rich cells/tissues.}, } @article {pmid39140725, year = {2024}, author = {Lemieux, JE}, title = {Analysis of the Borreliaceae Pangenome Reveals a Distinct Genomic Architecture Conserved Across Phylogenetic Scales.}, journal = {The Journal of infectious diseases}, volume = {230}, number = {Supplement_1}, pages = {S51-S61}, doi = {10.1093/infdis/jiae256}, pmid = {39140725}, issn = {1537-6613}, support = {K99/R00AI148604/NH/NIH HHS/United States ; }, mesh = {*Genome, Bacterial ; *Phylogeny ; Humans ; Borrelia/genetics/classification ; Genomics ; Lyme Disease/microbiology ; }, abstract = {The family Borreliaceae contains arthropod-borne spirochetes that cause two widespread human diseases, Lyme disease and relapsing fever. Lyme disease is a subacute, progressive illness with variable stage and tissue manifestations. Relapsing fever is an acute febrile illness with prominent bacteremia that may recur and disseminate, particularly to the nervous system. Clinical heterogeneity is a hallmark of both diseases. While human clinical manifestations are influenced by a wide variety of factors, including immune status and host genetic susceptibility, there is evidence that Borreliaceae microbial factors influence the clinical manifestations of human disease caused by this family of spirochetes. Despite these associations, the spirochete genes that influence the severity and manifestations of human disease are, for the most part, unknown. Recent work has identified lineage-specific expansions of lipoproteome-rich accessory genome elements in virulent clones of Borrelia burgdorferi. Using publicly available genome assemblies, it is shown that all Borreliaceae lineages for which sufficient sequence data are available harbor a similar pattern of strongly structured, lineage-specific expansions in their accessory genomes, particularly among lipoproteins, and that this pattern holds across phylogenetic scales including genera, species, and genotypes. The relationships among pangenome elements suggest that infrequent episodes of marked genomic change followed by clonal expansion in geographically and enzootically structured populations may account for the unique lineage structure of Borreliaceae. This analysis informs future genotype-phenotype studies among Borreliaceae and lays a foundation for studies of individual gene function guided by phylogenetic patterns of conservation, diversification, gain, and/or loss.}, } @article {pmid39138795, year = {2024}, author = {Perrin, C and Coutts, M and Dadone-Montaudié, B}, title = {Subungual melanoma: molecular analysis of 31 cases from early stage to invasive melanoma.}, journal = {Histopathology}, volume = {}, number = {}, pages = {}, doi = {10.1111/his.15297}, pmid = {39138795}, issn = {1365-2559}, abstract = {AIMS: The distinction between the benign subungual melanocytic lesions and an early lesion of subungual melanoma (SUM) remains a diagnostic challenge. We evaluated the routine diagnostic utility of array Comparative Genomic Hybridization (aCGH) to detect whole-genome copy number variations (CNV) as well as targeted next-generation sequencing (NGS) in SUM.

METHODS AND RESULTS: This retrospective study included 20 cases of in situ SUM and 11 cases of invasive SUM. Analysis by aCGH detected common oncogene amplifications in all but one case of invasive SUM (n = 10) and in all cases of in situ SUM with a melanocyte count (MC) >45/mm (n = 4 true positive) and the average number of CNV was 8.5. Thirteen remaining cases of in situ SUM gave false negative results (n = 13), owing to a lack of sufficient melanocytes to analyse (median MC of 35.35; range: 10.16-39.5). Molecular analysis failed in four cases (three in situ SUM and one invasive SUM) due to insufficient amounts of DNA. Across the whole cohort, the sensitivity of aCGH was 52%, but when adjusting the cutoff to MC >45/mm, the sensitivity was 93%. Targeted NGS was less informative than aCGH analyses in our series of SUM.

CONCLUSION: To distinguish malignant from benign lesions, especially in situ SUM versus atypical lentiginous melanocytic proliferations, aCGH analysis should be performed when the MC is above 45 melanocytes per linear millimetre. This pangenomic method can detect oncogene amplifications, as well as a number of CNV >3, which strongly support the diagnosis of malignancy.}, } @article {pmid39137112, year = {2024}, author = {Feng, Y and Yang, Y and Hu, Y and Xiao, Y and Xie, Y and Wei, L and Wen, H and Zhang, L and McNally, A and Zong, Z}, title = {Population genomics uncovers global distribution, antimicrobial resistance, and virulence genes of the opportunistic pathogen Klebsiella aerogenes.}, journal = {Cell reports}, volume = {43}, number = {8}, pages = {114602}, doi = {10.1016/j.celrep.2024.114602}, pmid = {39137112}, issn = {2211-1247}, abstract = {Klebsiella aerogenes is an understudied and clinically important pathogen. We therefore investigate its population structure by genome analysis aligned with metadata. We sequence 130 non-duplicated K. aerogenes clinical isolates and identify two inter-patient transmission events. We then retrieve all publicly available K. aerogenes genomes (n = 1,026, accessed by January 1, 2023) and analyze them with our 130 genomes. We develop a core-genome multi-locus sequence-typing scheme. We find that K. aerogenes is a species complex comprising four phylogroups undergoing evolutionary divergence, likely forming three species. We delineate remarkable clonal diversity and identify three worldwide-distributed carbapenemase-encoding clonal clusters, representing high-risk lineages. We uncover that K. aerogenes has an open genome equipped by a large arsenal of antimicrobial resistance genes. We identify two genetic regions specific for K. aerogenes, encoding a type VI secretion system and flagella/chemotaxis for motility, respectively, both contributing to the virulence. These results provide much-needed insights into the population structure and pan-genomes of K. aerogenes.}, } @article {pmid39134411, year = {2024}, author = {Kuronen, J and Horsfield, ST and Pöntinen, AK and Mallawaarachchi, S and Arredondo-Alonso, S and Thorpe, H and Gladstone, RA and Willems, RJL and Bentley, SD and Croucher, NJ and Pensar, J and Lees, JA and Tonkin-Hill, G and Corander, J}, title = {Pangenome-spanning epistasis and coselection analysis via de Bruijn graphs.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278485.123}, pmid = {39134411}, issn = {1549-5469}, abstract = {Studies of bacterial adaptation and evolution are hampered by the difficulty of measuring traits such as virulence, drug resistance, and transmissibility in large populations. In contrast, it is now feasible to obtain high-quality complete assemblies of many bacterial genomes thanks to scalable high-accuracy long-read sequencing technologies. To exploit this opportunity, we introduce a phenotype- and alignment-free method for discovering coselected and epistatically interacting genomic variation from genome assemblies covering both core and accessory parts of genomes. Our approach uses a compact colored de Bruijn graph to approximate the intragenome distances between pairs of loci for a collection of bacterial genomes to account for the impacts of linkage disequilibrium (LD). We demonstrate the versatility of our approach to efficiently identify associations between loci linked with drug resistance and adaptation to the hospital niche in the major human bacterial pathogens Streptococcus pneumoniae and Enterococcus faecalis.}, } @article {pmid39133351, year = {2024}, author = {Yin, J and He, M and Liu, XX and Ren, CB and Liu, HH and Luo, H and Chen, G and Wang, ZF and Debnath, SC and Wang, PM and Chen, HX and Zheng, DQ}, title = {Peteryoungia algae sp. nov. isolated from seaweeds of Gouqi Island, China, and its unique genetic features among Peteryoungia strains.}, journal = {Antonie van Leeuwenhoek}, volume = {117}, number = {1}, pages = {112}, pmid = {39133351}, issn = {1572-9699}, support = {LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; SKJC-2024-02-003//Project of Sanya Yazhou Bay Science and Technology City/ ; }, mesh = {*Phylogeny ; China ; *RNA, Ribosomal, 16S/genetics ; *Seaweed/microbiology ; *DNA, Bacterial/genetics ; *Base Composition ; *Fatty Acids/analysis/chemistry ; Bacterial Typing Techniques ; Genome, Bacterial ; Sequence Analysis, DNA ; Islands ; Nucleic Acid Hybridization ; }, abstract = {A Gram-stain-negative, light khaki, strictly aerobic, rod-shaped, motile via multiple flagella, and catalase- and oxidase-positive bacterium, designated as SSM4.3[T], was isolated from the seaweed of Gouqi Island in the East China Sea. The novel isolate grows at 0-5.0% NaCl concentrations (w/v) (optimum 1%), pH 5.0-9.0 (optimum pH 7.0), and 15-37 °C (optimum 30 °C). The 16S rRNA gene sequences-based phylogeny indicates that the novel marine isolate belongs to the family Rhizobiaceae and that it shared the greatest sequence similarity (98.9%) with Peteryoungia rhizophila CGMCC 1.15691[T]. This classification was also supported by phylogenetic analysis using core genes. The predominant fatty acids (≥ 10%) of the strain were identified as C18:1 ω7c/C18:1 ω6c. Q-10 was identified as the major isoprenoid quinone, with trace levels of Q-9 present. The major polar lipids were identified as diphosphatidylglycerol, phosphatidylethanolamine and phosphatidylglycerol. The complete genome size of strain SSM4.3[T] is 4.39 Mb with a DNA G+C content of 61.3%. The average nucleotide identity, digital DNA-DNA hybridization, and average amino acid identity values between the genomes of strain SSM4.3[T] and its closely related representatives were 74.80-86.93%, 20.00-32.30%, and 70.30-91.52%, respectively. Phylogenetic analysis, grounded on the core genes, reveals the evolutionary relationship between SSM4.3[T] and other Peteryoungia strains. Pan-genomics analysis of 8 previously classified Peteryoungia species and SSM4.3[T] revealed their unique genetic features and functions. Overall, strain SSM4.3[T] was considered to be a new species of the Peteryoungia genus; the name Peteryoungia algae sp. nov. has been proposed, with type strain SSM4.3[T] (= LMG 32561 = MCCC 1K07170).}, } @article {pmid39132840, year = {2024}, author = {Vale, FF and Roberts, RJ and Kobayashi, I and Camargo, MC and Rabkin, CS}, title = {Gene content, phage cycle regulation model and prophage inactivation disclosed by prophage genomics in the Helicobacter pylori Genome Project.}, journal = {Gut microbes}, volume = {16}, number = {1}, pages = {2379440}, doi = {10.1080/19490976.2024.2379440}, pmid = {39132840}, issn = {1949-0984}, mesh = {*Helicobacter pylori/genetics/virology ; *Prophages/genetics/physiology ; *Genome, Bacterial ; Humans ; *Phylogeny ; *Genomics ; Helicobacter Infections/microbiology ; }, abstract = {Prophages can have major clinical implications through their ability to change pathogenic bacterial traits. There is limited understanding of the prophage role in ecological, evolutionary, adaptive processes and pathogenicity of Helicobacter pylori, a widespread bacterium causally associated with gastric cancer. Inferring the exact prophage genomic location and completeness requires complete genomes. The international Helicobacter pylori Genome Project (HpGP) dataset comprises 1011 H. pylori complete clinical genomes enriched with epigenetic data. We thoroughly evaluated the H. pylori prophage genomic content in the HpGP dataset. We investigated population evolutionary dynamics through phylogenetic and pangenome analyses. Additionally, we identified genome rearrangements and assessed the impact of prophage presence on bacterial gene disruption and methylome. We found that 29.5% (298) of the HpGP genomes contain prophages, of which only 32.2% (96) were complete, minimizing the burden of prophage carriage. The prevalence of H. pylori prophage sequences was variable by geography and ancestry, but not by disease status of the human host. Prophage insertion occasionally results in gene disruption that can change the global bacterial epigenome. Gene function prediction allowed the development of the first model for lysogenic-lytic cycle regulation in H. pylori. We have disclosed new prophage inactivation mechanisms that appear to occur by genome rearrangement, merger with other mobile elements, and pseudogene accumulation. Our analysis provides a comprehensive framework for H. pylori prophage biological and genomics, offering insights into lysogeny regulation and bacterial adaptation to prophages.}, } @article {pmid39130480, year = {2024}, author = {Hasnat, S and Hoque, MN and Mahbub, MM and Sakif, TI and Shahinuzzaman, ADA and Islam, T}, title = {Pantothenate kinase: A promising therapeutic target against pathogenic Clostridium species.}, journal = {Heliyon}, volume = {10}, number = {14}, pages = {e34544}, pmid = {39130480}, issn = {2405-8440}, abstract = {Current treatment of clostridial infections includes broad-spectrum antibiotics and antitoxins, yet antitoxins are ineffective against all Clostridiumspecies. Moreover, rising antimicrobial resistance (AMR) threatens treatment effectiveness and public health. This study therefore aimed to discover a common drug target for four pathogenic clostridial species, Clostridium botulinum, C. difficile, C. tetani, and C. perfringens through an in-silico core genomic approach. Using four reference genomes of C. botulinum, C. difficile, C. tetani, and C. perfringens, we identified 1484 core genomic proteins (371/genome) and screened them for potential drug targets. Through a subtractive approach, four core proteins were finally identified as drug targets, represented by type III pantothenate kinase (CoaX) and, selected for further analyses. Interestingly, the CoaX is involved in the phosphorylation of pantothenate (vitamin B5), which is a critical precursor for coenzyme A (CoA) biosynthesis. Investigation of druggability analysis on the identified drug target reinforces CoaX as a promising novel drug target for the selected Clostridium species. During the molecular screening of 1201 compounds, a known agonist drug compound (Vibegron) showed strong inhibitory activity against targeted clostridial CoaX. Additionally, we identified tazobactam, a beta-lactamase inhibitor, as effective against the newly proposed target, CoaX. Therefore, identifying CoaX as a single drug target effective against all four clostridial pathogens presents a valuable opportunity to develop a cost-effective treatment for multispecies clostridial infections.}, } @article {pmid39120932, year = {2024}, author = {Krisna, MA and Jolley, KA and Monteith, W and Boubour, A and Hamers, RL and Brueggemann, AB and Harrison, OB and Maiden, MCJ}, title = {Development and implementation of a core genome multilocus sequence typing scheme for Haemophilus influenzae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001281}, pmid = {39120932}, issn = {2057-5858}, mesh = {*Haemophilus influenzae/genetics/classification ; *Multilocus Sequence Typing/methods ; *Phylogeny ; *Genome, Bacterial ; Humans ; Haemophilus Infections/microbiology ; Genetic Variation ; }, abstract = {Haemophilus influenzae is part of the human nasopharyngeal microbiota and a pathogen causing invasive disease. The extensive genetic diversity observed in H. influenzae necessitates discriminatory analytical approaches to evaluate its population structure. This study developed a core genome multilocus sequence typing (cgMLST) scheme for H. influenzae using pangenome analysis tools and validated the cgMLST scheme using datasets consisting of complete reference genomes (N = 14) and high-quality draft H. influenzae genomes (N = 2297). The draft genome dataset was divided into a development dataset (N = 921) and a validation dataset (N = 1376). The development dataset was used to identify potential core genes, and the validation dataset was used to refine the final core gene list to ensure the reliability of the proposed cgMLST scheme. Functional classifications were made for all the resulting core genes. Phylogenetic analyses were performed using both allelic profiles and nucleotide sequence alignments of the core genome to test congruence, as assessed by Spearman's correlation and ordinary least square linear regression tests. Preliminary analyses using the development dataset identified 1067 core genes, which were refined to 1037 with the validation dataset. More than 70% of core genes were predicted to encode proteins essential for metabolism or genetic information processing. Phylogenetic and statistical analyses indicated that the core genome allelic profile accurately represented phylogenetic relatedness among the isolates (R [2] = 0.945). We used this cgMLST scheme to define a high-resolution population structure for H. influenzae, which enhances the genomic analysis of this clinically relevant human pathogen.}, } @article {pmid39116952, year = {2024}, author = {Khan, MAS and Chaity, SC and Hosen, A and Rahman, SR}, title = {Genomic epidemiology of multidrug-resistant clinical Acinetobacter baumannii in Bangladesh.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105656}, doi = {10.1016/j.meegid.2024.105656}, pmid = {39116952}, issn = {1567-7257}, abstract = {The rising frequency of multidrug-resistant (MDR) Acinetobacter baumannii infections represents a significant public health challenge in Bangladesh. Genomic analysis of bacterial pathogens enhances surveillance and control efforts by providing insights into genetic diversity, antimicrobial resistance (AMR) profiles, and transmission dynamics. In this study, we conducted a comprehensive bioinformatic analysis of 82 whole-genome sequences (WGS) of A. baumannii from Bangladesh to understand their genomic epidemiological characteristics. WGS of the MDR and biofilm-forming A. baumannii strain S1C revealed the presence of 28 AMR genes, predicting its pathogenicity and classification within sequence type ST2. Multi-locus sequence typing (MLST) genotyping suggested heterogeneity in the distribution of clinical A. baumannii strains in Bangladesh, with a predominance of ST575. The resistome diversity was evident from the detection of 82 different AMR genes, with antibiotic inactivation being the most prevalent resistance mechanism. All strains were predicted to be multidrug-resistant. The observed virulence genes were associated with immune evasion, biofilm formation, adherence, nutrient acquisition, effector delivery, and other mechanisms. Mobile genetic elements carrying AMR genes were predicted in 68.29% (N = 56) of the genomes. The "open" state of the pan-genome and a high proportion of accessory genes highlighted the genome plasticity and diversity of A. baumannii in Bangladesh. Additionally, phylogenomic analysis indicated clustering of A. baumannii strains into three separate clades according to sequence type. In summary, our findings offer detailed insights into the genomic landscape of A. baumannii in Bangladesh, contributing to our understanding of its epidemiology and pathogenicity and informing strategies to combat this pathogen.}, } @article {pmid39116702, year = {2024}, author = {Singhvi, N and Talwar, C and Nagar, S and Verma, H and Kaur, J and Mahato, NK and Ahmad, N and Mondal, K and Gupta, V and Lal, R}, title = {Insights into the radiation and oxidative stress mechanisms in genus Deinococcus.}, journal = {Computational biology and chemistry}, volume = {112}, number = {}, pages = {108161}, doi = {10.1016/j.compbiolchem.2024.108161}, pmid = {39116702}, issn = {1476-928X}, abstract = {Deinococcus species, noted for their exceptional resistance to DNA-damaging environmental stresses, have piqued scientists' interest for decades. This study dives into the complex mechanisms underpinning radiation resistance in the Deinococcus genus. We have examined the genomes of 82 Deinococcus species and classified radiation-resistance proteins manually into five unique curated categories: DNA repair, oxidative stress defense, Ddr and Ppr proteins, regulatory proteins, and miscellaneous resistance components. This classification reveals important information about the various molecular mechanisms used by these extremophiles which have been less explored so far. We also investigated the presence or lack of these proteins in the context of phylogenetic relationships, core, and pan-genomes, which offered light on the evolutionary dynamics of radiation resistance. This comprehensive study provides a deeper understanding of the genetic underpinnings of radiation resistance in the Deinococcus genus, with potential implications for understanding similar mechanisms in other organisms using an interactomics approach. Finally, this study reveals the complexities of radiation resistance mechanisms, providing a comprehensive understanding of the genetic components that allow Deinococcus species to flourish under harsh environments. The findings add to our understanding of the larger spectrum of stress adaption techniques in bacteria and may have applications in sectors ranging from biotechnology to environmental research.}, } @article {pmid39116055, year = {2024}, author = {Rojas-Vargas, J and Rebollar, EA and Sanchez-Flores, A and Pardo-López, L}, title = {A comparative genomic study of a hydrocarbon-degrading marine bacterial consortium.}, journal = {PloS one}, volume = {19}, number = {8}, pages = {e0303363}, doi = {10.1371/journal.pone.0303363}, pmid = {39116055}, issn = {1932-6203}, mesh = {*Biodegradation, Environmental ; *Hydrocarbons/metabolism ; *Phylogeny ; *Genome, Bacterial ; *Genomics/methods ; Microbial Consortia/genetics ; Bacteria/genetics/metabolism/classification ; Seawater/microbiology ; }, abstract = {Ocean oil pollution has a large impact on the environment and the health of living organisms. Bioremediation cleaning strategies are promising eco-friendly alternatives for tackling this problem. Previously, we designed and reported a hydrocarbon (HC) degrading microbial consortium of four marine strains belonging to the species Alloalcanivorax xenomutans, Halopseudomonas aestusnigri, Paenarthrobacter sp., and Pseudomonas aeruginosa. However, the knowledge about the metabolic potential of this bacterial consortium for HC bioremediation is not yet well understood. Here, we analyzed the complete genomes of these marine bacterial strains accompanied by a phylogenetic reconstruction along with 138 bacterial strains. Synteny between complete genomes of the same species or genus, revealed high conservation among strains of the same species, covering over 91% of their genomic sequences. Functional predictions highlighted a high abundance of genes related to HC degradation, which may result in functional redundancy within the consortium; however, unique and complete gene clusters linked to aromatic degradation were found in the four genomes, suggesting substrate specialization. Pangenome gain and loss analysis of genes involved in HC degradation provided insights into the evolutionary history of these capabilities, shedding light on the acquisition and loss of relevant genes related to alkane and aromatic degradation. Our work, including comparative genomic analyses, identification of secondary metabolites, and prediction of HC-degrading genes, enhances our understanding of the functional diversity and ecological roles of these marine bacteria in crude oil-contaminated marine environments and contributes to the applied knowledge of bioremediation.}, } @article {pmid39114027, year = {2024}, author = {Saikat, TA and Sayem Khan, MA and Islam, MS and Tasnim, Z and Ahmed, S}, title = {Characterization and genome mining of Bacillus subtilis BDSA1 isolated from river water in Bangladesh: A promising bacterium with diverse biotechnological applications.}, journal = {Heliyon}, volume = {10}, number = {14}, pages = {e34369}, pmid = {39114027}, issn = {2405-8440}, abstract = {The metabolic versatility of Bacillus subtilis makes it useful for a wide range of applications in biotechnology, from bioremediation to industrially important metabolite production. Understanding the molecular attributes of the biocontrol characteristics of B. subtilis is necessary for its tailored use in the environment and industry. Therefore, the present study aimed to conduct phenotypic characterization and whole genome analysis of the B. subtilis BDSA1 isolated from polluted river water from Dhaka, Bangladesh to explore its biotechnological potential. The chromium reduction capacity at 100 ppm Cr (VI) showed that B. subtilis BDSA1 reduced 40 % of Cr (VI) within 24hrs at 37 °C. Exposure of this bacterium to 200 ppm cadmium resulted in 43 % adsorption following one week of incubation at 37 °C. Molecular detection of chrA and czcC gene confirmed chromium and cadmium resistance characteristics of BDSA1. The size of the genome of the B. subtilis BDSA1 was 4.2 Mb with 43.4 % GC content. Genome annotation detected the presence of numerous genes involved in the degradation of xenobiotics, resistance to abiotic stress, production of lytic enzymes, siderophore formation, and plant growth promotion. The assembled genome also carried chromium, cadmium, copper, and arsenic resistance-related genes, notably cadA, czcD, czrA, arsB etc. Genome mining revealed six biosynthetic gene clusters for bacillaene, bacillibacin, bacilysin, subtilosin, fengycin and surfactin. Importantly, BDSA1 was predicted to be non-pathogenic to humans and had only two acquired antimicrobial resistance genes. The pan-genome analysis showed the openness of the B. subtilis pan-genome. Our findings suggested that B. subtilis BDSA1 might be a promising candidate for diverse biotechnological uses.}, } @article {pmid39113613, year = {2024}, author = {Chen, M and Trotter, VV and Walian, PJ and Chen, Y and Lopez, R and Lui, LM and Nielsen, TN and Malana, RG and Thorgersen, MP and Hendrickson, AJ and Carion, H and Deutschbauer, AM and Petzold, CJ and Smith, HJ and Arkin, AP and Adams, MWW and Fields, MW and Chakraborty, R}, title = {Molecular mechanisms and environmental adaptations of flagellar loss and biofilm growth of Rhodanobacter under environmental stress.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae151}, pmid = {39113613}, issn = {1751-7370}, abstract = {Biofilms aid bacterial adhesion to surfaces via direct and indirect mechanisms, and formation of biofilms is considered as an important strategy for adaptation and survival in sub-optimal environmental conditions. However, the molecular underpinnings of biofilm formation in subsurface sediment/groundwater ecosystems where microorganisms often experience fluctuations in nutrient input, pH, nitrate or metal concentrations is underexplored. We examined biofilm formation under different nutrient, pH, metal, and nitrate regimes of 16 Rhodanobacter strains isolated from subsurface groundwater wells spanning diverse pH (3.5 to 5) and nitrate levels (13.7 to 146 mM). Eight Rhodanobacter strains demonstrated significant biofilm growth under low pH, suggesting adaptation to survive and grow at low pH. Biofilms intensified under aluminum stress, particularly in strains possessing fewer genetic traits associated with biofilm formation warranting further investigation. Through RB-TnSeq, proteomics, use of specific mutants and transmission electron microscopy analysis, we discovered flagellar loss under aluminum stress, indicating a potential relationship between motility, metal tolerance, and biofilm growth. Comparative genomic analyses revealed absence of flagella and chemotaxis genes, and presence of putative Type VI secretion system in the high biofilm-forming strain FW021-MT20. This study identifies genetic determinants associated with biofilm growth in a predominant environmental genus, Rhodanobacter, under metal stress and identifies traits aiding survival and adaptation to contaminated subsurface environments.}, } @article {pmid39113147, year = {2024}, author = {Wang, Z and Wang, M and Du, L}, title = {Public perceptions of international genetic information sharing for biomedical research in China: a case study of the social media debate on the article "A Pangenome Reference of 36 Chinese Populations" published in Nature.}, journal = {Human genomics}, volume = {18}, number = {1}, pages = {86}, pmid = {39113147}, issn = {1479-7364}, support = {MYRG2020-00096-FLL//Universidade de Macau/ ; }, abstract = {BACKGROUND: The international disclosure of Chinese human genetic data continues to be a contentious issue in China, generating public debates in both traditional and social media channels. Concerns have intensified after Chinese scientists' research on pangenome data was published in the prestigious journal Nature.

METHODS: This study scrutinized microblogs posted on Weibo, a popular Chinese social media site, in the two months immediately following the publication (June 14, 2023-August 21, 2023). Content analysis was conducted to assess the nature of public responses, justifications for positive or negative attitudes, and the users' overall knowledge of how Chinese human genetic information is regulated and managed in China.

RESULTS: Weibo users displayed contrasting attitudes towards the article's public disclose of pangenome research data, with 18% positive, 64% negative, and 18% neutral. Positive attitudes came primarily from verified government and media accounts, which praised the publication. In contrast, negative attitudes originated from individual users who were concerned about national security and health risks and often believed that the researchers have betrayed China. The benefits of data sharing highlighted in the commentaries included advancements in disease research and scientific progress. Approximately 16% of the microblogs indicated that Weibo users had misunderstood existing regulations and laws governing data sharing and stewardship.

CONCLUSIONS: Based on the predominantly negative public attitudes toward scientific data sharing established by our study, we recommend enhanced outreach by scientists and scientific institutions to increase the public understanding of developments in genetic research, international data sharing, and associated regulations. Additionally, governmental agencies can alleviate public fears and concerns by being more transparent about their security reviews of international collaborative research involving Chinese human genetic data and its cross-border transfer.}, } @article {pmid39113037, year = {2024}, author = {Kuo, WH and Wright, SJ and Small, LL and Olsen, KM}, title = {De novo genome assembly of white clover (Trifolium repens L.) reveals the role of copy number variation in rapid environmental adaptation.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {165}, pmid = {39113037}, issn = {1741-7007}, support = {IOS-1557770//Directorate for Biological Sciences/ ; DEB-1601641//Division of Environmental Biology/ ; DGE-1143954//Division of Graduate Education/ ; }, abstract = {BACKGROUND: White clover (Trifolium repens) is a globally important perennial forage legume. This species also serves as an eco-evolutionary model system for studying within-species chemical defense variation; it features a well-studied polymorphism for cyanogenesis (HCN release following tissue damage), with higher frequencies of cyanogenic plants favored in warmer locations worldwide. Using a newly generated haplotype-resolved genome and two other long-read assemblies, we tested the hypothesis that copy number variants (CNVs) at cyanogenesis genes play a role in the ability of white clover to rapidly adapt to local environments. We also examined questions on subgenome evolution in this recently evolved allotetraploid species and on chromosomal rearrangements in the broader IRLC legume clade.

RESULTS: Integration of PacBio HiFi, Omni-C, Illumina, and linkage map data yielded a completely de novo genome assembly for white clover (created without a priori sequence assignment to subgenomes). We find that white clover has undergone extensive transposon diversification since its origin but otherwise shows highly conserved genome organization and composition with its diploid progenitors. Unlike some other clover species, its chromosomal structure is conserved with other IRLC legumes. We further find extensive evidence of CNVs at the major cyanogenesis loci; these contribute to quantitative variation in the cyanogenic phenotype and to local adaptation across wild North American populations.

CONCLUSIONS: This work provides a case study documenting the role of CNVs in local adaptation in a plant species, and it highlights the value of pan-genome data for identifying contributions of structural variants to adaptation in nature.}, } @article {pmid39111568, year = {2024}, author = {Joishy, TK and Bhattacharya, A and Singh, CT and Mukherjee, AK and Khan, MR}, title = {Probiotic and anti-inflammatory properties of Lactiplantibacillus plantarum MKTJ24 isolated from an artisanal fermented fish of North-east India.}, journal = {New biotechnology}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.nbt.2024.07.005}, pmid = {39111568}, issn = {1876-4347}, abstract = {The study aimed to isolate and characterize lactic acid bacteria from various traditional fermented fish products from North East India, including Xindol, Hentak, and Ngari, which hold significant dietary importance for the indigenous tribes. Additionally, the study sought to examine their untargeted metabolomic profiles. A total of 43 strains of Bacillus, Priestia, Staphylococcus, Pediococcus, and Lactiplantibacillus were isolated, characterized by 16S rRNA gene and tested for probiotic properties. Five strains passed pH and bile salt tests with strain dependent antimicrobial activity, which exhibited moderate autoaggregation and hydrophobicity properties. Lactiplantibacillus plantarum MKTJ24 exhibited the highest hydrophobicity (42%), which was further confirmed by adhesion assay in HT-29 cell lines (100%). Lactiplantibacillus plantarum MKTJ24 treatment in LPS-stimulated HT-29 cells up-regulated expression of mucin genes compared to LPS-treated cells. Treatment of RAW 264.7 cells with Lactiplantibacillus plantarum MKTJ24 decreased LPS-induced reactive oxygen species (ROS) and nitric oxide (NO) productions. Further, genome analysis of Lactiplantibacillus plantarum MKTJ24 revealed the presence of several probiotic markers and immunomodulatory genes. The genome was found to harbour plantaracin operon involved in bacteriocin production. A pangenome analysis using all the publicly available L. plantarum genomes specifically isolated from fermented fish products identified 120 unique genes in Lactiplantibacillus plantarum MKTJ24. Metabolomic analysis indicated dominance of ascorbic acids, pentafluropropionate, cyclopropaneacetic acid, florobenzylamine, and furanonee in Xindol. This study suggests that Lactiplantibacillus plantarum MKTJ24 has potential probiotic and immunomodulatory properties that could be used in processing traditional fermented fish products on an industrial scale to improve their quality and enhance functional properties.}, } @article {pmid39106791, year = {2024}, author = {Gasparini, K and Figueiredo, YG and Araújo, WL and Peres, LE and Zsögön, A}, title = {De novo domestication in the Solanaceae: advances and challenges.}, journal = {Current opinion in biotechnology}, volume = {89}, number = {}, pages = {103177}, doi = {10.1016/j.copbio.2024.103177}, pmid = {39106791}, issn = {1879-0429}, abstract = {The advent of highly efficient genome editing (GE) tools, coupled with high-throughput genome sequencing, has paved the way for the accelerated domestication of crop wild relatives. New crops could thus be rapidly created that are well adapted to cope with drought, flooding, soil salinity, or insect damage. De novo domestication avoids the complexity of transferring polygenic stress resistance from wild species to crops. Instead, new crops can be created by manipulating major genes in stress-resistant wild species. However, the genetic basis of certain relevant domestication-related traits often involve epistasis and pleiotropy. Furthermore, pan-genome analyses show that structural variation driving gene expression changes has been selected during domestication. A growing body of work suggests that the Solanaceae family, which includes crop species such as tomatoes, potatoes, eggplants, peppers, and tobacco, is a suitable model group to dissect these phenomena and operate changes in wild relatives to improve agronomic traits rapidly with GE. We briefly discuss the prospects of this exciting novel field in the interface between fundamental and applied plant biology and its potential impact in the coming years.}, } @article {pmid39107817, year = {2024}, author = {Le, DQ and Nguyen, TA and Nguyen, SH and Nguyen, TT and Nguyen, CH and Phung, HT and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {Efficient inference of large prokaryotic pangenomes with PanTA.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {209}, pmid = {39107817}, issn = {1474-760X}, support = {VINIF.2019.DA11//VinIF/ ; }, abstract = {Pangenome inference is an indispensable step in bacterial genomics, yet its scalability poses a challenge due to the rapid growth of genomic collections. This paper presents PanTA, a software package designed for constructing pangenomes of large bacterial datasets, showing unprecedented efficiency levels multiple times higher than existing tools. PanTA introduces a novel mechanism to construct the pangenome progressively without rebuilding the accumulated collection from scratch. The progressive mode is shown to consume orders of magnitude less computational resources than existing solutions in managing growing datasets. The software is open source and is publicly available at https://github.com/amromics/panta and at 10.6084/m9.figshare.23724705 .}, } @article {pmid39107305, year = {2024}, author = {Cortinovis, G and Vincenzi, L and Anderson, R and Marturano, G and Marsh, JI and Bayer, PE and Rocchetti, L and Frascarelli, G and Lanzavecchia, G and Pieri, A and Benazzo, A and Bellucci, E and Di Vittori, V and Nanni, L and Ferreira Fernández, JJ and Rossato, M and Aguilar, OM and Morrell, PL and Rodriguez, M and Gioia, T and Neumann, K and Alvarez Diaz, JC and Gratias, A and Klopp, C and Bitocchi, E and Geffroy, V and Delledonne, M and Edwards, D and Papa, R}, title = {Adaptive gene loss in the common bean pan-genome during range expansion and domestication.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {6698}, pmid = {39107305}, issn = {2041-1723}, support = {862862//EC | Horizon 2020 Framework Programme (EU Framework Programme for Research and Innovation H2020)/ ; }, mesh = {*Phaseolus/genetics ; *Genome, Plant ; *Domestication ; Adaptation, Physiological/genetics ; Genotype ; Genetic Variation ; Crops, Agricultural/genetics ; Selection, Genetic ; Evolution, Molecular ; Mutation ; Plant Breeding/methods ; }, abstract = {The common bean (Phaseolus vulgaris L.) is a crucial legume crop and an ideal evolutionary model to study adaptive diversity in wild and domesticated populations. Here, we present a common bean pan-genome based on five high-quality genomes and whole-genome reads representing 339 genotypes. It reveals ~234 Mb of additional sequences containing 6,905 protein-coding genes missing from the reference, constituting 49% of all presence/absence variants (PAVs). More non-synonymous mutations are found in PAVs than core genes, probably reflecting the lower effective population size of PAVs and fitness advantages due to the purging effect of gene loss. Our results suggest pan-genome shrinkage occurred during wild range expansion. Selection signatures provide evidence that partial or complete gene loss was a key adaptive genetic change in common bean populations with major implications for plant adaptation. The pan-genome is a valuable resource for food legume research and breeding for climate change mitigation and sustainable agriculture.}, } @article {pmid39102038, year = {2024}, author = {Olanrewaju, OS and Molale-Tom, LG and Bezuidenhout, CC}, title = {Genomic diversity, antibiotic resistance, and virulence in South African Enterococcus faecalis and Enterococcus lactis isolates.}, journal = {World journal of microbiology & biotechnology}, volume = {40}, number = {10}, pages = {289}, pmid = {39102038}, issn = {1573-0972}, mesh = {South Africa ; *Genome, Bacterial ; *Enterococcus faecalis/genetics/drug effects/pathogenicity/isolation & purification ; Virulence/genetics ; *Genetic Variation ; *Anti-Bacterial Agents/pharmacology ; *Virulence Factors/genetics ; Humans ; Drug Resistance, Bacterial/genetics ; Genomic Islands/genetics ; Gram-Positive Bacterial Infections/microbiology ; Enterococcus/genetics/drug effects/pathogenicity/isolation & purification/classification ; Phylogeny ; Gene Transfer, Horizontal ; Genomics ; Microbial Sensitivity Tests ; }, abstract = {This study presents the empirical findings of an in-depth genomic analysis of Enterococcus faecalis and Enterococcus lactis isolates from South Africa. It offers valuable insights into their genetic characteristics and their significant implications for public health. The study uncovers nuanced variations in the gene content of these isolates, despite their similar GC contents, providing a comprehensive view of the evolutionary diversity within the species. Genomic islands are identified, particularly in E. faecalis, emphasizing its propensity for horizontal gene transfer and genetic diversity, especially in terms of antibiotic resistance genes. Pangenome analysis reveals the existence of a core genome, accounting for a modest proportion of the total genes, with 2157 core genes, 1164 shell genes, and 4638 cloud genes out of 7959 genes in 52 South African E. faecalis genomes (2 from this study, 49 south Africa genomes downloaded from NCBI, and E. faecalis reference genome). Detecting large-scale genomic rearrangements, including chromosomal inversions, underscores the dynamic nature of bacterial genomes and their role in generating genetic diversity. The study uncovers an array of antibiotic resistance genes, with trimethoprim, tetracycline, glycopeptide, and multidrug resistance genes prevalent, raising concerns about the effectiveness of antibiotic treatment. Virulence gene profiling unveils a diverse repertoire of factors contributing to pathogenicity, encompassing adhesion, biofilm formation, stress resistance, and tissue damage. These empirical findings provide indispensable insights into these bacteria's genomic dynamics, antibiotic resistance mechanisms, and virulence potential, underlining the pressing need to address antibiotic resistance and implement robust control measures.}, } @article {pmid39101619, year = {2024}, author = {Kileeg, Z and Wang, P and Mott, GA}, title = {Chromosome-scale assembly and annotation of eight Arabidopsis thaliana ecotypes.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae169}, pmid = {39101619}, issn = {1759-6653}, abstract = {The plant Arabidopsis thaliana is a model system used by researchers through much of plant research. Recent efforts have focused on discovering the genomic variation found in naturally occurring ecotypes isolated from around the world. These ecotypes have come from diverse climates and therefore have faced and adapted to a variety of abiotic and biotic stressors. The sequencing and comparative analysis of these genomes can offer insight into the adaptive strategies of plants. While there are a large number of ecotype genome sequences available, the majority were created using short-read technology. Mapping of short-reads containing structural variation to a reference genome bereft of that variation leads to incorrect mapping of those reads, resulting in a loss of genetic information and introduction of false heterozygosity. For this reason, long-read de novo sequencing of genomes is required to resolve structural variation events. In this paper, we sequenced the genomes of eight natural variants of A. thaliana using nanopore sequencing. This resulted in highly contiguous assemblies with >95% of the genome contained within 5 contigs. The sequencing results from this study include 5 ecotypes from relict and African populations, an area of untapped genetic diversity. With this study, we increase the knowledge of diversity we have across A. thaliana ecotypes and contribute to ongoing production of an A. thaliana pan-genome.}, } @article {pmid39095952, year = {2024}, author = {She, H and Liu, Z and Xu, Z and Zhang, H and Wu, J and Cheng, F and Wang, X and Qian, W}, title = {Pan-genome analysis of 13 Spinacia accessions reveals structural variations associated with sex chromosome evolution and domestication traits in spinach.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14433}, pmid = {39095952}, issn = {1467-7652}, support = {CAAS-ASTIP-IVFCAAS//the Chinese Academy of Agricultural Sciences Innovation Project/ ; IVF-BRF2023002//Central Public-interest Scientific Institution Basal Research Fund/ ; Y2023QC07//Central Public-interest Scientific Institution Basal Research Fund/ ; CARS-23-A-17//China Agricultural Research System/ ; }, abstract = {Structural variations (SVs) are major genetic variants that can be involved in the origin, adaptation and domestication of species. However, the identification and characterization of SVs in Spinacia species are rare due to the lack of a pan-genome. Here, we report eight chromosome-scale assemblies of cultivated spinach and its two wild species. After integration with five existing assemblies, we constructed a comprehensive Spinacia pan-genome and identified 193 661 pan-SVs, which were genotyped in 452 Spinacia accessions. Our pan-SVs enabled genome-wide association study identified signals associated with sex and clarified the evolutionary direction of spinach. Most sex-linked SVs (86%) were biased to occur on the Y chromosome during the evolution of the sex-linked region, resulting in reduced Y-linked gene expression. The frequency of pan-SVs among Spinacia accessions further illustrated the contribution of these SVs to domestication, such as bolting time and seed dormancy. Furthermore, compared with SNPs, pan-SVs act as efficient variants in genomic selection (GS) because of their ability to capture missing heritability information and higher prediction accuracy. Overall, this study provides a valuable resource for spinach genomics and highlights the potential utility of pan-SV in crop improvement and breeding programmes.}, } @article {pmid39091872, year = {2024}, author = {Lin, MJ and Langmead, B and Safonova, Y}, title = {IGLoo: Profiling the Immunoglobulin Heavy chain locus in Lymphoblastoid Cell Lines with PacBio High-Fidelity Sequencing reads.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.20.604421}, pmid = {39091872}, issn = {2692-8205}, abstract = {New high-quality human genome assemblies derived from lymphoblastoid cell lines (LCLs) provide reference genomes and pangenomes for genomics studies. However, the characteristics of LCLs pose technical challenges to profiling immunoglobulin (IG) genes. IG loci in LCLs contain a mixture of germline and somatically recombined haplotypes, making them difficult to genotype or assemble accurately. To address these challenges, we introduce IGLoo , a software tool that implements novel methods for analyzing sequence data and genome assemblies derived from LCLs. IGLoo characterizes somatic V(D)J recombination events in the sequence data and identifies the breakpoints and missing IG genes in the LCL-based assemblies. Furthermore, IGLoo implements a novel reassembly framework to improve germline assembly quality by integrating information about somatic events and population structural variantions in the IG loci. We applied IGLoo to study the assemblies from the Human Pangenome Reference Consortium, providing new insights into the mechanisms, gene usage, and patterns of V(D)J recombination, causes of assembly fragmentation in the IG heavy chain (IGH) locus, and improved representation of the IGH assemblies.}, } @article {pmid39088040, year = {2024}, author = {Nguyen, AK and Schall, PZ and Kidd, JM}, title = {A map of canine sequence variation relative to a Greenland wolf outgroup.}, journal = {Mammalian genome : official journal of the International Mammalian Genome Society}, volume = {}, number = {}, pages = {}, pmid = {39088040}, issn = {1432-1777}, abstract = {For over 15 years, canine genetics research relied on a reference assembly from a Boxer breed dog named Tasha (i.e., canFam3.1). Recent advances in long-read sequencing and genome assembly have led to the development of numerous high-quality assemblies from diverse canines. These assemblies represent notable improvements in completeness, contiguity, and the representation of gene promoters and gene models. Although genome graph and pan-genome approaches have promise, most genetic analyses in canines rely upon the mapping of Illumina sequencing reads to a single reference. The Dog10K consortium, and others, have generated deep catalogs of genetic variation through an alignment of Illumina sequencing reads to a reference genome obtained from a German Shepherd Dog named Mischka (i.e., canFam4, UU_Cfam_GSD_1.0). However, alignment to a breed-derived genome may introduce bias in genotype calling across samples. Since the use of an outgroup reference genome may remove this effect, we have reprocessed 1929 samples analyzed by the Dog10K consortium using a Greenland wolf (mCanLor1.2) as the reference. We efficiently performed remapping and variant calling using a GPU-implementation of common analysis tools. The resulting call set removes the variability in genetic differences seen across samples and breed relationships revealed by principal component analysis are not affected by the choice of reference genome. Using this sequence data, we inferred the history of population sizes and found that village dog populations experienced a 9-13 fold reduction in historic effective population size relative to wolves.}, } @article {pmid39087631, year = {2024}, author = {Cong, J and Zhang, S and Zhang, Q and Yu, X and Huang, J and Wei, X and Huang, X and Qiu, J and Zhou, X}, title = {Conserved features and diversity attributes of chimeric RNAs across accessions in four plants.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14437}, pmid = {39087631}, issn = {1467-7652}, support = {2023ZD04073//National Key Research and Development Program of China/ ; 32370671//National Natural Science Foundation of China/ ; 22ZR1445800//Natural Science Foundation of Shanghai/ ; }, abstract = {As a non-collinear expression form of genetic information, chimeric RNAs increase the complexity of transcriptome in diverse organisms. Although chimeric RNAs have been identified in plants, few common features have been revealed. Here, we systemically explored the landscape of chimeric RNAs across multi-accession and multi-tissue using pan-genome and transcriptome data of four plants: rice, maize, soybean, and Arabidopsis. Among the four species, conserved characteristics of breakpoints and parental genes were discovered. In each species, chimeric RNAs displayed a high level of diversity among accessions, and the clustering of accessions using chimeric events was generally concordant with clustering based on genomic variants, implying a general relationship between genetic variations and chimeric RNAs. Through mass spectrometry, we confirmed a fusion protein OsNDC1-OsGID1L2 and observed its subcellular localization, which differed from the original proteins. Phenotypic cues in transgenic rice suggest the potential functions of OsNDC1-OsGID1L2. Moreover, an intriguing chimeric event Os01g0216500-Os01g0216900, generated by a large deletion in basmati rice, also exists in another accession without the deletion, demonstrating its convergence in evolution. Our results illuminate the characteristics and hint at the evolutionary implications of plant chimeric RNAs, which serve as a supplement to genetic variations, thus expanding our understanding of genetic diversity.}, } @article {pmid39083619, year = {2024}, author = {Rose, SA and Robicheau, BM and Tolman, J and Fonseca-Batista, D and Rowland, E and Desai, D and Ratten, JM and Kantor, EJH and Comeau, AM and Langille, MGI and Jerlström-Hultqvist, J and Devred, E and Sarthou, G and Bertrand, EM and LaRoche, J}, title = {Nitrogen fixation in the widely distributed marine γ-proteobacterial diazotroph Candidatus Thalassolituus haligoni.}, journal = {Science advances}, volume = {10}, number = {31}, pages = {eadn1476}, doi = {10.1126/sciadv.adn1476}, pmid = {39083619}, issn = {2375-2548}, mesh = {*Nitrogen Fixation ; *Gammaproteobacteria/genetics/metabolism/isolation & purification/enzymology/classification ; *Phylogeny ; Nitrogenase/metabolism/genetics ; Seawater/microbiology ; Metagenome ; Oxidoreductases ; }, abstract = {The high diversity and global distribution of heterotrophic bacterial diazotrophs (HBDs) in the ocean has recently become apparent. However, understanding the role these largely uncultured microorganisms play in marine N2 fixation poses a challenge due to their undefined growth requirements and the complex regulation of the nitrogenase enzyme. We isolated and characterized Candidatus Thalassolituus haligoni, a member of a widely distributed clade of HBD belonging to the Oceanospirillales. Analysis of its nifH gene via amplicon sequencing revealed the extensive distribution of Cand. T. haligoni across the Pacific, Atlantic, and Arctic Oceans. Pangenome analysis indicates that the isolate shares >99% identity with an uncultured metagenome-assembled genome called Arc-Gamma-03, recently recovered from the Arctic Ocean. Through combined genomic, proteomic, and physiological approaches, we confirmed that the isolate fixes N2 gas. However, the mechanisms governing nitrogenase regulation in Cand. T. haligoni remain unclear. We propose Cand. T. haligoni as a globally distributed, cultured HBD model species within this understudied clade of Oceanospirillales.}, } @article {pmid39079170, year = {2024}, author = {Kim, JI and Manuele, A and Maguire, F and Zaheer, R and McAllister, TA and Beiko, R}, title = {Identification of key drivers of antimicrobial resistance in Enterococcus using machine learning.}, journal = {Canadian journal of microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1139/cjm-2024-0049}, pmid = {39079170}, issn = {1480-3275}, abstract = {With antimicrobial resistance (AMR) rapidly evolving in pathogens, quick and accurate identification of genetic determinants of phenotypic resistance is essential for improving surveillance, stewardship, and clinical mitigation. Machine learning (ML) models show promise for AMR prediction in diagnostics but require a deep understanding of internal processes to use effectively. Our study utilized AMR gene, pangenomic, and predicted plasmid features from 647 Enterococcus faecium and Enterococcus faecalis genomes across the One Health continuum, along with corresponding resistance phenotypes, to develop interpretive ML classifiers. Vancomycin resistance could be predicted with 99% accuracy with AMR gene features, 98% with pangenome features, and 96% with plasmid clusters. Top pangenome features overlapped with the resistance genes of the vanA operon, which are often laterally transmitted via plasmids. Doxycycline resistance prediction achieved approximately 92% accuracy with pangenome features, with the top feature being elements of Tn916 conjugative transposon, a tet(M) carrier. Erythromycin resistance prediction models achieved about 90% accuracy, but top features were negatively correlated with resistance due to the confounding effect of population structure. This work demonstrates the importance of reviewing ML models' features to discern biological relevance even when achieving high-performance metrics. Our workflow offers the potential to propose hypotheses for experimental testing, enhancing the understanding of AMR mechanisms, which are crucial for combating the AMR crisis.}, } @article {pmid39073678, year = {2024}, author = {Gan, S and Ruan, L and Xu, X and Luo, L and Huo, Y and Jiang, J and Zhang, X and Shang, C}, title = {Whole genome sequencing and analysis of Bacillus sp. TTMP2, a tetramethylpyrazine-producing bacterium.}, journal = {Molecular biology reports}, volume = {51}, number = {1}, pages = {863}, pmid = {39073678}, issn = {1573-4978}, support = {AB21220057 and 2021AB27009//Guangxi Key Research and Development Program/ ; }, mesh = {*Bacillus/genetics/metabolism ; *Pyrazines/metabolism ; *Whole Genome Sequencing/methods ; *Genome, Bacterial/genetics ; *Phylogeny ; Metabolic Networks and Pathways/genetics ; Molecular Sequence Annotation ; }, abstract = {BACKGROUND: Tetramethylpyrazine has been extensively studied as an anticancer substance and a flavor substance in the fields of medicine and food industry. A strain with high tetramethylpyrazine production was screened from the fermented grains of Danquan winery. Genome sequencing can reveal the potential roles of bacteria by thoroughly examining the connection between genes and phenotypes from a genomic perspective.

METHODS AND RESULTS: In this study, whole genome of this strain was sequenced and analyzed. This paper summarized the genomic characteristics of strain TTMP2 and analyzed genes related to the synthesis of tetramethylpyrazine. Bacillus sp. TTMP2 has a complete metabolic pathway for acetoin and tetramethylpyrazine metabolism. Gene function was analyzed by COG annotation, GO annotation, KEGG annotation and functional annotations for lipoproteins, carbohydrate-active enzymes, and pathogen-host interactions. Phylogenetic analysis indicated that Bacillus velezensis had the high homology with Bacillus sp. TTMP2. Genomes of 16 Bacillus species cover all genes of Bacillus, suggesting that genus Bacillus has an open pan-genome and can survive in diverse environments.

CONCLUSION: The analysis of genome sequencing data from Bacillus sp. TTMP2 showed that its metabolic characteristics could be deeply understood, indicating that this bacterium had a particular role in tetramethylpyrazine synthesis.}, } @article {pmid39071359, year = {2024}, author = {Kim, J and Varki, R and Oliva, M and Boucher, C}, title = {Re [2] Pair: Increasing the Scalability of RePair by Decreasing Memory Usage.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.11.603142}, pmid = {39071359}, issn = {2692-8205}, abstract = {UNLABELLED: The RePair compression algorithm produces a context-free grammar by iteratively substituting the most frequently occurring pair of consecutive symbols with a new symbol until all consecutive pairs of symbols appear only once in the compressed text. It is widely used in the settings of bioinformatics, machine learning, and information retrieval where random access to the original input text is needed. For example, in pangenomics, RePair is used for random access to a population of genomes. BigRePair improves the scalability of the original RePair algorithm by using Prefix-Free Parsing (PFP) to preprocess the text prior to building the RePair grammar. Despite the efficiency of PFP on repetitive text, there is a scalability issue with the size of the parse which causes a memory bottleneck in BigRePair. In this paper, we design and implement recursive RePair (denoted as Re [2] Pair), which builds the RePair grammar using recursive PFP. Our novel algorithm faces the challenge of constructing the RePair grammar without direct access to the parse of text, relying solely on the dictionary of the text and the parse and dictionary of the parse of the text. We compare Re [2] Pair to BigRePair using SARS-CoV-2 haplotypes and haplotypes from the 1000 Genomes Project. We show that our method Re [2] Pair achieves over a 40% peak memory reduction and a speed up ranging between 12% to 79% compared to BigRePair when compressing the largest input texts in all experiments. Re [2] Pair is made publicly available under the GNU public license here: https://github.com/jkim210/Recursive-RePair.

Theory of computation → Formal languages and automata theory.}, } @article {pmid39065181, year = {2024}, author = {Romanenko, L and Bystritskaya, E and Savicheva, Y and Eremeev, V and Otstavnykh, N and Kurilenko, V and Velansky, P and Isaeva, M}, title = {Description and Whole-Genome Sequencing of Mariniflexile litorale sp. nov., Isolated from the Shallow Sediments of the Sea of Japan.}, journal = {Microorganisms}, volume = {12}, number = {7}, pages = {}, doi = {10.3390/microorganisms12071413}, pmid = {39065181}, issn = {2076-2607}, support = {15.BRK.21.0004 (Contract No. 075-15-2021-1052)//Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A Gram-negative, aerobic, rod-shaped, non-motile, yellow-pigmented bacterium, KMM 9835[T], was isolated from the sediment sample obtained from the Amur Bay of the Sea of Japan seashore, Russia. Phylogenetic analyses based on the 16S rRNA gene and whole genome sequences positioned the novel strain KMM 9835[T] in the genus Mariniflexile as a separate line sharing the highest 16S rRNA gene sequence similarities of 96.6% and 96.2% with Mariniflexile soesokkakense RSSK-9[T] and Mariniflexile fucanivorans SW5[T], respectively, and similarity values of <96% to other recognized Mariniflexile species. The average nucleotide identity and digital DNA-DNA hybridization values between strain KMM 9835[T] and M. soesokkakense KCTC 32427[T], Mariniflexile gromovii KCTC 12570[T], M. fucanivorans DSM 18792[T], and M. maritimum M5A1M[T] were 83.0%, 82.5%, 83.4%, and 78.3% and 30.7%, 29.6%, 29.5%, and 24.4%, respectively. The genomic DNA GC content of strain KMM 9835[T] was 32.5 mol%. The dominant menaquinone was MK-6, and the major fatty acids were iso-C15:0, iso-C15:1ω10c, and C15:0. The polar lipids of strain KMM 9835[T] consisted of phosphatidylethanolamine, two unidentified aminolipids, an unidentified phospholipid, and six unidentified lipids. A pan-genome analysis showed that the KMM 9835[T] genome encoded 753 singletons. The annotated singletons were more often related to transport protein systems (SusC), transcriptional regulators (AraC, LytTR, LacI), and enzymes (glycosylases). The KMM 9835[T] genome was highly enriched in CAZyme-encoding genes, the proportion of which reached 7.3%. Moreover, the KMM 9835[T] genome was characterized by a high abundance of CAZyme gene families (GH43, GH28, PL1, PL10, CE8, and CE12), indicating its potential to catabolize pectin. This may represent part of an adaptation strategy facilitating microbial consumption of plant polymeric substrates in aquatic environments near shorelines and freshwater sources. Based on the combination of phylogenetic and phenotypic characterization, the marine sediment strain KMM 9835[T] (=KCTC 92792[T]) represents a novel species of the genus Mariniflexile, for which the name Mariniflexile litorale sp. nov. is proposed.}, } @article {pmid39065090, year = {2024}, author = {Tatarenkov, A and Muñoz-Gutiérrez, I and Vargas, I and Behnsen, J and Mota-Bravo, L}, title = {Pangenome Analysis Reveals Novel Contact-Dependent Growth Inhibition System and Phenazine Biosynthesis Operons in Proteus mirabilis BL95 That Are Located in An Integrative and Conjugative Element.}, journal = {Microorganisms}, volume = {12}, number = {7}, pages = {}, doi = {10.3390/microorganisms12071321}, pmid = {39065090}, issn = {2076-2607}, support = {GM136498//National Institute of General Medical Sciences (NIGMS), NIH/ ; }, abstract = {Proteus mirabilis is a leading cause of urinary tract infections and a common commensal of the gastrointestinal tract. Our recent study (JB) showed that P. mirabilis strain BL95 employs a novel contact-dependent killing system against enteric bacteria in the mouse gut and in vitro. To uncover the genetic determinants of this system, we performed whole-genome sequencing of BL95 and compared it with 98 complete genomes of P. mirabilis. BL95 carries 56 coding sequences (CDSs) not found in other P. mirabilis. Over half of these unique genes are located on a novel integrative conjugative element (ICE) named ICEPm2, inserted in tRNA-Phe and exclusive to BL95. ICEPm2 has integration, conjugation, and DNA replication modules nearly identical to ICEPm1 (common in P. mirabilis), but ICEPm2 of BL95 carries two unique operons for P. mirabilis-a phenazine biosynthesis and a contact-dependent growth inhibition (CDI) system. ICEPm2 is absent in the P. mirabilis (AR_0156) closest to BL95 and it is present in the genomes of several Escherichia coli from mouse intestines, indicating its recent horizontal mobilization. BL95 shares over 100 genes of five different secretion systems with other P. mirabilis, mostly poorly studied, making a large pool of candidate genes for the contact-dependent growth inhibition.}, } @article {pmid39059904, year = {2024}, author = {Peng, J and Xiao, R and Wu, C and Zheng, Z and Deng, Y and Chen, K and Xiang, Y and Xu, C and Zou, L and Liao, M and Zhang, J}, title = {Characterization of the prevalence of Salmonella in different retail chicken supply modes using genome-wide and machine-learning analyses.}, journal = {Food research international (Ottawa, Ont.)}, volume = {191}, number = {}, pages = {114654}, doi = {10.1016/j.foodres.2024.114654}, pmid = {39059904}, issn = {1873-7145}, abstract = {Salmonella is a foodborne pathogen that causes salmonellosis, of which retail chicken meat is a major source. However, the prevalence of Salmonella in different retail chicken supply modes and the threat posed to consumers remains unclear. The prevalence, serotype distribution, antibiotic resistance, and genomic characteristics of Salmonella in three supply modes of retail chicken (live poultry, frozen, and chilled) were investigated using whole-genome sequencing (WGS) and machine learning (ML). In this study, 480 retail chicken samples from live poultry, frozen, and chilled supply modes in Guangzhou from 2020 to 2021, as well as 253 Salmonella isolates (total isolation rate = 53.1 %), were collected. The prevalence of isolates in the live poultry mode (67.5 %, 81/120) was statistically higher than in the frozen (50.0 %, 120/240) and chilled (43.3 %, 52/120) (P < 0.05) modes. Serotype identification showed significant differences in the serotype distribution of Salmonella in different supply modes. S. Enteritis (46.7 %) and S. Indiana (14.2 %) were predominant in the frozen mode. S. Agona (23.5 %) and S. Saintpaul (13.6 %) were predominant in live poultry, while S. Enteritis (40.4 %) and S. Kentucky (17.3 %) were predominant in chilled mode. Antibiotic testing showed that frozen mode isolates were more resistant; the multidrug-resistant (MDR) rate of isolates in the frozen mode reached 91.8 %, significantly higher than in the chilled (86.5 %) and live (74.1 %) (P < 0.05) modes. WGS was performed on 155 top serotypes (S. Enteritidis, S. Kentucky, S. Indiana, and S. Agona). The antibiotic resistance gene analysis showed that the abundance and carrying rate of antibiotic resistance genes of Salmonella in the frozen mode (54 types, 16.1 %) were significantly higher than in other modes (live poultry: 36 types, 9.4 %, P < 0.05; chilled: 31 types, 11.6 %). The blaNDM-1 and blaNDM-9 genes encoding carbapenem resistance were found in frozen mode isolates on a complex transposon consisting of TnAS3-IS26. Virulence factors and plasmid replicons were abundant in the studied frozen mode isolates. In addition, single nucleotide polymorphism (SNP) phylogenetic tree results showed that in the frozen supply mode, the S. Enteritidis clonal clade continued to contaminate retail chicken meat and was homologous to S. Enteritidis strains found in farm chicken embryos, slaughterhouse chicken carcasses, and patients from hospitals in China (SNP 0 = 10). Notably, the pan-genome-based ML model showed that characteristic genes in frozen and live poultry isolates differed. The narZ gene was a key characteristic gene in frozen isolates, encoding nitrate reductase, relating to anaerobic bacterial growth. The ydgJ gene is a key characteristic gene in the live mode and encodes an oxidoreductase related to oxidative function in bacteria. The high prevalence of live poultry mode Salmonella and the transmission of frozen mode MDR Salmonella in this study pose serious risks to food safety and public health, emphasizing the importance of improving disinfection and cold storage measures to reduce Salmonella contamination and transmission. In conclusion, the continued surveillance of Salmonella across different supply models and the development of an epidemiological surveillance system based on WGS is necessary.}, } @article {pmid39059819, year = {2024}, author = {Radford, EJ and Whitworth, DE}, title = {The genetic basis of predation by myxobacteria.}, journal = {Advances in microbial physiology}, volume = {85}, number = {}, pages = {1-55}, doi = {10.1016/bs.ampbs.2024.04.001}, pmid = {39059819}, issn = {2162-5468}, abstract = {Myxobacteria (phylum Myxococcota) are abundant and virtually ubiquitous microbial predators. Facultatively multicellular organisms, they are able to form multicellular fruiting bodies and swarm across surfaces, cooperatively hunting for prey. Myxobacterial communities are able to kill a wide range of prey microbes, assimilating their biomass to fuel population growth. Their mechanism of predation is exobiotic - hydrolytic enzymes and toxic metabolites are secreted into the extracellular environment, killing and digesting prey cells from without. However, recent observations of single-cell predation and contact-dependent prey killing challenge the dogma of myxobacterial predation being obligately cooperative. Regardless of their predatory mechanisms, myxobacteria have a broad prey range, which includes Gram-negative bacteria, Gram-positive bacteria and fungi. Pangenome analyses have shown that their extremely large genomes are mainly composed of accessory genes, which are not shared by all members of their species. It seems that the diversity of accessory genes in different strains provides the breadth of activity required to prey upon such a smorgasbord of microbes, and also explains the considerable strain-to-strain variation in predatory efficiency against specific prey. After providing a short introduction to general features of myxobacterial biology which are relevant to predation, this review brings together a rapidly growing body of work into the molecular mechanisms and genetic basis of predation, presenting a summary of current knowledge, highlighting trends in research and suggesting strategies by which we can potentially exploit myxobacterial predation in the future.}, } @article {pmid39059732, year = {2024}, author = {Magome, TG and Ochai, SO and Hassim, A and Bezuidenhout, CC and van Heerden, H and Lekota, KE}, title = {A genome-based investigation of the Priestia species isolated from anthrax endemic regions in Kruger National Park.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105649}, doi = {10.1016/j.meegid.2024.105649}, pmid = {39059732}, issn = {1567-7257}, abstract = {Priestia is a genus that was renamed from the genus Bacillus based on the conserved signature indels (CSIs) in protein sequences that separate Priestia species from Bacillus, with the latter only including closely related species to B. subtilis and B. cereus. Diagnosis of anthrax, a zoonotic disease is implicated by tripartite anthrax virulence genes (lef, pagA, and cya) and poly-γ-D-glutamic acid capsular genes (capABCDE) of Bacillus anthracis. Due to the amplification of anthrax virulence genes in Priestia isolates, the search for homologous anthrax virulence genes within the Priestia spp. (n = 9) isolated from animal blood smears was embarked upon through whole genome sequencing. In silico taxonomic identification of the isolates was conducted using genome taxonomy database (GTDB), average nucleotide identity (ANI), and multi-locus sequence typing (MLST), which identified the genomes as P. aryabhattai (n = 5), P. endophytica (n = 2) and P. megaterium (n = 2). A pan-genome analysis was further employed on the Priestia genomes, including the screening of virulence, antibiotic resistance genes and mobile genetic elements on the sequenced genomes. The oligoribonuclease NrnB protein sequences showed that Priestia spp. possess a unique CSI that is absent in other Bacillus species. Furthermore, the CSI in P. endophytica is unique from other Priestia spp. Pan-genomic analysis indicates that P. endophytica clusters separately from P. aryabhattai and P. megaterium. In silico BLASTn genome analysis using the SYBR primers, Taqman probes and primers that target the chromosomal marker (Ba-1), protective antigen (pagA), and lethal factor (lef) on B. anthracis, showed partial binding to Priestia regions encoding for hypothetical proteins, pyridoxine biosynthesis, hydrolase, and inhibitory proteins. The antibiotic resistance genes (ARG) profile of Priestia spp. showed that the genomes contained no more than two ARGs. This included genes conferring resistance to rifamycin and fosfomycin (P. endophytica) as well as clindamycin (P. aryabhattai and P. megaterium). Priestia genomes lacked B. anthracis plasmids and consisted of plasmid replicon types with unknown functions. Furthermore, the amplification of Priestia strains may result in false positives when qPCR is used to detect the virulence genes of B. anthracis in soil, blood smears, and/or environmental samples.}, } @article {pmid39058093, year = {2024}, author = {Peñil-Celis, A and Tagg, KA and Webb, HE and Redondo-Salvo, S and Francois Watkins, L and Vielva, L and Griffin, C and Kim, JY and Folster, JP and Garcillan-Barcia, MP and de la Cruz, F}, title = {Mobile genetic elements define the non-random structure of the Salmonella enterica serovar Typhi pangenome.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0036524}, doi = {10.1128/msystems.00365-24}, pmid = {39058093}, issn = {2379-5077}, abstract = {Bacterial relatedness measured using select chromosomal loci forms the basis of public health genomic surveillance. While approximating vertical evolution through this approach has proven exceptionally valuable for understanding pathogen dynamics, it excludes a fundamental dimension of bacterial evolution-horizontal gene transfer. Incorporating the accessory genome is the logical remediation and has recently shown promise in expanding epidemiological resolution for enteric pathogens. Employing k-mer-based Jaccard index analysis, and a novel genome length distance metric, we computed pangenome (i.e., core and accessory) relatedness for the globally important pathogen Salmonella enterica serotype Typhi (Typhi), and graphically express both vertical (homology-by-descent) and horizontal (homology-by-admixture) evolutionary relationships in a reticulate network of over 2,200 U.S. Typhi genomes. This analysis revealed non-random structure in the Typhi pangenome that is driven predominantly by the gain and loss of mobile genetic elements, confirming and expanding upon known epidemiological patterns, revealing novel plasmid dynamics, and identifying avenues for further genomic epidemiological exploration. With an eye to public health application, this work adds important biological context to the rapidly improving ways of analyzing bacterial genetic data and demonstrates the value of the accessory genome to infer pathogen epidemiology and evolution.IMPORTANCEGiven bacterial evolution occurs in both vertical and horizontal dimensions, inclusion of both core and accessory genetic material (i.e., the pangenome) is a logical step toward a more thorough understanding of pathogen dynamics. With an eye to public, and indeed, global health relevance, we couple contemporary tools for genomic analysis with decades of research on mobile genetic elements to demonstrate the value of the pangenome, known and unknown, annotated, and hypothetical, for stratification of Salmonella enterica serovar Typhi (Typhi) populations. We confirm and expand upon what is known about Typhi epidemiology, plasmids, and antimicrobial resistance dynamics, and offer new avenues of exploration to further deduce Typhi ecology and evolution, and ultimately to reduce the incidence of human disease.}, } @article {pmid39058033, year = {2024}, author = {Mortimer, TD}, title = {mSphere of Influence: Predicting the evolution of pathogen populations.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0043224}, doi = {10.1128/msphere.00432-24}, pmid = {39058033}, issn = {2379-5042}, abstract = {Tatum D. Mortimer works in the field of pathogen population genomics and evolution. In this mSphere of Influence article, she reflects on how "Frequency-dependent selection can forecast evolution in Streptococcus pneumoniae" by Azarian et al. and "Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome" by Beavan et al. made an impact on her by highlighting the ways in which genomic data can be used to predict pathogen evolution.}, } @article {pmid39057398, year = {2024}, author = {Cui, H and Fan, S and Ding, W and Zhang, W}, title = {Genomic Analysis of Novel Sulfitobacter Bacterial Strains Isolated from Marine Biofilms.}, journal = {Marine drugs}, volume = {22}, number = {7}, pages = {}, doi = {10.3390/md22070289}, pmid = {39057398}, issn = {1660-3397}, mesh = {*Biofilms ; *Phylogeny ; *Genome, Bacterial ; Genomics/methods ; Aquatic Organisms ; Multigene Family ; }, abstract = {Bacteria from the genus Sulfitobacter are distributed across various marine habitats and play a significant role in sulfur cycling. However, the metabolic features of Sulfitobacter inhabiting marine biofilms are still not well understood. Here, complete genomes and paired metatranscriptomes of eight Sulfitobacter strains, isolated from biofilms on subtidal stones, have been analyzed to explore their central energy metabolism and potential of secondary metabolite biosynthesis. Based on average nucleotide identity and phylogenetic analysis, the eight strains were classified into six novel species and two novel strains. The reconstruction of the metabolic pathways indicated that all strains had a complete Entner-Doudoroff pathway, pentose phosphate pathway, and diverse pathways for amino acid metabolism, suggesting the presence of an optimized central carbon metabolism. Pangenome analysis further revealed the differences between the gene cluster distribution patterns among the eight strains, suggesting significant functional variation. Moreover, a total of 47 biosynthetic gene clusters were discovered, which were further classified into 37 gene cluster families that showed low similarity with previously documented clusters. Furthermore, metatranscriptomic analysis revealed the expressions of key functional genes involved in the biosynthesis of ribosomal peptides in in situ marine biofilms. Overall, this study sheds new light on the metabolic features, adaptive strategies, and value of genome mining in this group of biofilm-associated Sulfitobacter bacteria.}, } @article {pmid39056703, year = {2024}, author = {Abid, A and Alzahrani, B and Naz, S and Basheer, A and Bakhtiar, SM and Al-Asmari, F and Jamal, SB and Faheem, M}, title = {Reverse Vaccinology Approach to Identify Novel and Immunogenic Targets against Streptococcus gordonii.}, journal = {Biology}, volume = {13}, number = {7}, pages = {}, doi = {10.3390/biology13070510}, pmid = {39056703}, issn = {2079-7737}, abstract = {Streptococcus gordonii is a gram-positive, mutualistic bacterium found in the human body. It is found in the oral cavity, upper respiratory tract, and intestines, and presents a serious clinical problem because it can lead to opportunistic infections in individuals with weakened immune systems. Streptococci are the most prevalent inhabitants of oral microbial communities, and are typical oral commensals found in the human oral cavity. These streptococci, along with many other oral microbes, produce multispecies biofilms that can attach to salivary pellicle components and other oral bacteria via adhesin proteins expressed on the cell surface. Antibiotics are effective against this bacterium, but resistance against antibodies is increasing. Therefore, a more effective treatment is needed. Vaccines offer a promising method for preventing this issue. This study generated a multi-epitope vaccine against Streptococcus gordonii by targeting the completely sequenced proteomes of five strains. The vaccine targets are identified using a pangenome and subtractive proteomic approach. In the present study, 13 complete strains out of 91 strains of S. gordonii are selected. The pangenomics results revealed that out of 2835 pan genes, 1225 are core genes. Out of these 1225 core genes, 643 identified as non-homologous proteins by subtractive proteomics. A total of 20 essential proteins are predicted from non-homologous proteins. Among these 20 essential proteins, only five are identified as surface proteins. The vaccine construct is designed based on selected B- and T-cell epitopes of the antigenic proteins with the help of linkers and adjuvants. The designed vaccine is docked against TLR2. The expression of the protein is determined using in silico gene cloning. Findings concluded that Vaccine I with adjuvant shows higher interactions with TLR2, suggesting that the vaccine has the ability to induce a humoral and cell-mediated response to treat and prevent infection; this makes it promising as a vaccine against infectious diseases caused by S. gordonii. Furthermore, validation of the vaccine construct is required by in vitro and in vivo trials to check its actual potency and safety for use to prevent infectious diseases caused by S. gordonii.}, } @article {pmid39054411, year = {2024}, author = {Mol, M and de Maayer, P}, title = {Elucidating the biotechnological potential of the genera Parageobacillus and Saccharococcus through comparative genomic and pan-genome analysis.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {723}, pmid = {39054411}, issn = {1471-2164}, mesh = {*Phylogeny ; *Genome, Bacterial ; *Genomics/methods ; *Bacillaceae/genetics/classification ; Biotechnology ; }, abstract = {BACKGROUND: The genus Geobacillus and its associated taxa have been the focal point of numerous thermophilic biotechnological investigations, both at the whole cell and enzyme level. By contrast, comparatively little research has been done on its recently delineated sister genus, Parageobacillus. Here we performed pan-genomic analyses on a subset of publicly available Parageobacillus and Saccharococcus genomes to elucidate their biotechnological potential.

RESULTS: Phylogenomic analysis delineated the compared taxa into two distinct genera, Parageobacillus and Saccharococcus, with P. caldoxylosilyticus isolates clustering with S. thermophilus in the latter genus. Both genera present open pan-genomes, with the species P. toebii being characterized with the highest novel gene accrual. Diversification of the two genera is driven through the variable presence of plasmids, bacteriophages and transposable elements. Both genera present a range of potentially biotechnologically relevant features, including a source of novel antimicrobials, thermostable enzymes including DNA-active enzymes, carbohydrate active enzymes, proteases, lipases and carboxylesterases. Furthermore, they present a number of metabolic pathways pertinent to degradation of complex hydrocarbons and xenobiotics and for green energy production.

CONCLUSIONS: Comparative genomic analyses of Parageobacillus and Saccharococcus suggest that taxa in both of these genera can serve as a rich source of biotechnologically and industrially relevant secondary metabolites, thermostable enzymes and metabolic pathways that warrant further investigation.}, } @article {pmid39052555, year = {2024}, author = {Chekesa, B and Singh, H and Gonzalez-Juarbe, N and Vashee, S and Wiscovitch-Russo, R and Dupont, CL and Girma, M and Kerro, O and Gumi, B and Ameni, G}, title = {Pangenome and genomic signatures linked to the dominance of the lineage-4 of Mycobacterium tuberculosis isolated from extrapulmonary tuberculosis patients in western Ethiopia.}, journal = {PloS one}, volume = {19}, number = {7}, pages = {e0304060}, doi = {10.1371/journal.pone.0304060}, pmid = {39052555}, issn = {1932-6203}, mesh = {Humans ; Ethiopia/epidemiology ; *Mycobacterium tuberculosis/genetics/isolation & purification ; *Genome, Bacterial ; *Tuberculosis/microbiology/epidemiology/genetics ; Cross-Sectional Studies ; *Genome-Wide Association Study ; Male ; Female ; Adult ; Phylogeny ; Genomics/methods ; Middle Aged ; Young Adult ; Adolescent ; Tuberculosis, Extrapulmonary ; }, abstract = {BACKGROUND: The lineage 4 (L4) of Mycobacterium tuberculosis (MTB) is not only globally prevalent but also locally dominant, surpassing other lineages, with lineage 2 (L2) following in prevalence. Despite its widespread occurrence, factors influencing the expansion of L4 and its sub-lineages remain poorly understood both at local and global levels. Therefore, this study aimed to conduct a pan-genome and identify genomic signatures linked to the elevated prevalence of L4 sublineages among extrapulmonary TB (EPTB) patients in western Ethiopia.

METHODS: A cross-sectional study was conducted at an institutional level involving confirmed cases of extrapulmonary tuberculosis (EPTB) patients from August 5, 2018, to December 30, 2019. A total of 75 MTB genomes, classified under lineage 4 (L4), were used for conducting pan-genome and genome-wide association study (GWAS) analyses. After a quality check, variants were identified using MTBseq, and genomes were de novo assembled using SPAdes. Gene prediction and annotation were performed using Prokka. The pan-genome was constructed using GET_HOMOLOGUES, and its functional analysis was carried out with the Bacterial Pan-Genome Analysis tool (BPGA). For GWAS analysis, Scoary was employed with Benjamini-Hochberg correction, with a significance threshold set at p-value ≤ 0.05.

RESULTS: The analysis revealed a total of 3,270 core genes, predominantly associated with orthologous groups (COG) functions, notably in the categories of '[R] General function prediction only' and '[I] Lipid transport and metabolism'. Conversely, functions related to '[N] Cell motility' and '[Q] Secondary metabolites biosynthesis, transport, and catabolism' were primarily linked to unique and accessory genes. The pan-genome of MTB L4 was found to be open. Furthermore, the GWAS study identified genomic signatures linked to the prevalence of sublineages L4.6.3 and L4.2.2.2.

CONCLUSIONS: Apart from host and environmental factors, the sublineage of L4 employs distinct virulence factors for successful dissemination in western Ethiopia. Given that the functions of these newly identified genes are not well understood, it is advisable to experimentally validate their roles, particularly in the successful transmission of specific L4 sublineages over others.}, } @article {pmid39051872, year = {2024}, author = {Dost, I and Abdel-Glil, M and Persson, S and Conza, KL and Oleastro, M and Alves, F and Maurischat, S and Scholtzek, A and Mazuet, C and Diancourt, L and Tenson, T and Schmoock, G and Neubauer, H and Schwarz, S and Seyboldt, C}, title = {Genomic study of European Clostridioides difficile ribotype 002/sequence type 8.}, journal = {Microbial genomics}, volume = {10}, number = {7}, pages = {}, doi = {10.1099/mgen.0.001270}, pmid = {39051872}, issn = {2057-5858}, mesh = {*Clostridioides difficile/genetics/classification ; Humans ; *Ribotyping ; *Genome, Bacterial ; *Clostridium Infections/microbiology/epidemiology ; Multilocus Sequence Typing ; Phylogeny ; Animals ; Europe ; Denmark ; Whole Genome Sequencing ; Genomics ; Drug Resistance, Bacterial/genetics ; }, abstract = {Clostridioides difficile has significant clinical importance as a leading cause of healthcare-associated infections, with symptoms ranging from mild diarrhoea to severe colitis, and possible life-threatening complications. C. difficile ribotype (RT) 002, mainly associated with MLST sequence type (ST) 8, is one of the most common RTs found in humans. This study aimed at investigating the genetic characteristics of 537 C. difficile genomes of ST8/RT002. To this end, we sequenced 298 C. difficile strains representing a new European genome collection, with strains from Germany, Denmark, France and Portugal. These sequences were analysed against a global dataset consisting of 1,437 ST8 genomes available through Enterobase. Our results showed close genetic relatedness among the studied ST8 genomes, a diverse array of antimicrobial resistance (AMR) genes and the presence of multiple mobile elements. Notably, the pangenome analysis revealed an open genomic structure. ST8 shows relatively low overall variation. Thus, clonal isolates were found across different One Health sectors (humans, animals, environment and food), time periods, and geographical locations, suggesting the lineage's stability and a universal environmental source. Importantly, this stability did not hinder the acquisition of AMR genes, emphasizing the adaptability of this bacterium to different selective pressures. Although only 2.4 % (41/1,735) of the studied genomes originated from non-human sources, such as animals, food, or the environment, we identified 9 cross-sectoral core genome multilocus sequence typing (cgMLST) clusters. Our study highlights the importance of ST8 as a prominent lineage of C. difficile with critical implications in the context of One Health. In addition, these findings strongly support the need for continued surveillance and investigation of non-human samples to gain a more comprehensive understanding of the epidemiology of C. difficile.}, } @article {pmid39048791, year = {2024}, author = {Garg, V and Bohra, A and Mascher, M and Spannagl, M and Xu, X and Bevan, MW and Bennetzen, JL and Varshney, RK}, title = {Unlocking plant genetics with telomere-to-telomere genome assemblies.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {39048791}, issn = {1546-1718}, abstract = {Contiguous genome sequence assemblies will help us to realize the full potential of crop translational genomics. Recent advances in sequencing technologies, especially long-read sequencing strategies, have made it possible to construct gapless telomere-to-telomere (T2T) assemblies, thus offering novel insights into genome organization and function. Plant genomes pose unique challenges, such as a continuum of ancient to recent polyploidy and abundant highly similar and long repetitive elements. Owing to progress in sequencing approaches, for most crop plants, chromosome-scale reference genome assemblies are available, but T2T assembly construction remains challenging. Here we describe methods for haplotype-resolved, gapless T2T assembly construction in plants, including various crop species. We outline the impact of T2T assemblies in elucidating the roles of repetitive elements in gene regulation, as well as in pangenomics, functional genomics, genome-assisted breeding and targeted genome manipulation. In conjunction with sequence-enriched germplasm repositories, T2T assemblies thus hold great promise for basic and applied plant sciences.}, } @article {pmid39046316, year = {2024}, author = {Hong, UVT and Tamiru-Oli, M and Hurgobin, B and Lewsey, MG}, title = {Genomic and Cell-Specific Regulation of Benzylisoquinoline Alkaloid Biosynthesis in Opium Poppy.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae317}, pmid = {39046316}, issn = {1460-2431}, abstract = {Opium poppy is a crop of great commercial value as a source of several opium alkaloids for the pharmaceutical industries including morphine, codeine, thebaine, noscapine and papaverine. Most enzymes involved in benzylisoquinoline alkaloids (BIAs) biosynthesis in opium poppy have been functionally characterized, and opium poppy currently serves as a model system to study BIA metabolism in plants. BIA biosynthesis in opium poppy involves two biosynthetic gene clusters associated respectively with the morphine and noscapine branches. Recent reports have shown that genes in the same cluster are co-expressed, suggesting they might also be co-regulated. However, the transcriptional regulation of opium poppy BIA biosynthesis is not well studied. Opium poppy BIA biosynthesis involves three cell types associated with the phloem system: companion cells, sieve elements and laticifers. The transcripts and enzymes associated with BIA biosynthesis are distributed across cell types, requiring the translocation of key enzymes and pathway intermediates between cell types. Together, these suggest that the regulation of BIA biosynthesis in opium poppy is multilayered and complex, involving biochemical, genomic, and physiological mechanisms. In this review, we highlight recent advances in genome sequencing and single cell and spatial transcriptomics with a focus on how these efforts can improve our understanding of the genomic and cell-specific regulation of BIA biosynthesis. Such knowledge is vital for opium poppy genetic improvement and metabolic engineering efforts targeting the modulation of alkaloid yield and composition.}, } @article {pmid39044985, year = {2024}, author = {Chintakovid, N and Singkhamanan, K and Yaikhan, T and Nokchan, N and Wonglapsuwan, M and Jitpakdee, J and Kantachote, D and Surachat, K}, title = {Probiogenomic analysis of Lactiplantibacillus plantarum SPS109: A potential GABA-producing and cholesterol-lowering probiotic strain.}, journal = {Heliyon}, volume = {10}, number = {13}, pages = {e33823}, pmid = {39044985}, issn = {2405-8440}, abstract = {Lactiplantibacillus plantarum SPS109, an isolated strain of lactic acid bacteria (LAB) from fermented foods, showed remarkable potential as a probiotic with dual capabilities in γ-aminobutyric acid (GABA) production and cholesterol reduction. This study employs genomic and comparative analyses to search into the strain's genetic profile, safety features, and probiotic attributes. The safety assessment reveals the absence of virulence factors and antimicrobial resistance genes, while the genome uncovers bacteriocin-related elements, including sactipeptides and a cluster for putative plantaricins, strengthening its ability to combat diverse pathogens. Pangenome analysis revealed unique bacteriocin-related genes, specifically lcnD and bcrA, distinguishing SPS109 from four other L. plantarum strains producing GABA. In addition, genomic study emphasizes SPS109 strain distinctive features, two GABA-related genes responsible for GABA production and a bile tolerance gene (cbh) crucial for cholesterol reduction. Additionally, the analysis highlights several genes of potential probiotic properties, including stress tolerance, vitamin production, and antioxidant activity. In summary, L. plantarum SPS109 emerges as a promising probiotic candidate with versatile applications in the food and beverage industries, supported by its unique genomic features and safety profile.}, } @article {pmid39042999, year = {2024}, author = {L Rocha, J and Lou, RN and Sudmant, PH}, title = {Structural variation in humans and our primate kin in the era of telomere-to-telomere genomes and pangenomics.}, journal = {Current opinion in genetics & development}, volume = {87}, number = {}, pages = {102233}, doi = {10.1016/j.gde.2024.102233}, pmid = {39042999}, issn = {1879-0380}, abstract = {Structural variants (SVs) account for the majority of base pair differences both within and between primate species. However, our understanding of inter- and intra-species SV has been historically hampered by the quality of draft primate genomes and the absence of genome resources for key taxa. Recently, advances in long-read sequencing and genome assembly have begun to radically reshape our understanding of SVs. Two landmark achievements include the publication of a human telomere-to-telomere (T2T) genome as well as the development of the first human pangenome reference. In this review, we first look back to the major works laying the foundation for these projects. We then examine the ways in which T2T genome assemblies and pangenomes are transforming our understanding of and approach to primate SV. Finally, we discuss what the future of primate SV research may look like in the era of T2T genomes and pangenomics.}, } @article {pmid39041615, year = {2024}, author = {Li, H and Marin, M and Farhat, MR}, title = {Exploring gene content with pangene graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae456}, pmid = {39041615}, issn = {1367-4811}, abstract = {MOTIVATION: The gene content regulates the biology of an organism. It varies between species and between individuals of the same species. Although tools have been developed to identify gene content changes in bacterial genomes, none is applicable to collections of large eukaryotic genomes such as the human pangenome.

RESULTS: We developed pangene, a computational tool to identify gene orientation, gene order and gene copy-number changes in a collection of genomes. Pangene aligns a set of input protein sequences to the genomes, resolves redundancies between protein sequences and constructs a gene graph with each genome represented as a walk in the graph. It additionally finds subgraphs, which we call bibubbles, that capture gene content changes. Applied to the human pangenome, pangene identifies known gene-level variations and reveals complex haplotypes that are not well studied before. Pangene also works with high-quality bacterial pangenome and reports similar numbers of core and accessory genes in comparison to existing tools.

Source code at https://github.com/lh3/pangene; pre-built pangene graphs can be downloaded from https://zenodo.org/records/8118576 and visualized at https://pangene.bioinweb.org.}, } @article {pmid39037482, year = {2024}, author = {Mazwi, KD and Lekota, KE and Glover, BA and Kolo, FB and Hassim, A and Rossouw, J and Jonker, A and Wojno, JM and Profiti, G and Martelli, PL and Casadio, R and Zilli, K and Janowicz, A and Marotta, F and Garofolo, G and van Heerden, H}, title = {Whole Genome Sequence Analysis of Brucella spp. from Human, Livestock, and Wildlife in South Africa.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {}, number = {}, pages = {}, pmid = {39037482}, issn = {1976-3794}, support = {98651//National Research Foundation/ ; }, abstract = {Brucellosis is an economically important zoonotic disease affecting humans, livestock, and wildlife health globally and especially in Africa. Brucella abortus and B. melitensis have been isolated from human, livestock (cattle and goat), and wildlife (sable) in South Africa (SA) but with little knowledge of the population genomic structure of this pathogen in SA. As whole genome sequencing can assist to differentiate and trace the origin of outbreaks of Brucella spp. strains, the whole genomes of retrospective isolates (n = 19) from previous studies were sequenced. Sequences were analysed using average nucleotide identity (ANI), pangenomics, and whole genome single nucleotide polymorphism (wgSNP) to trace the geographical origin of cases of brucellosis circulating in human, cattle, goats, and sable from different provinces in SA. Pangenomics analysis of B. melitensis (n = 69) and B. abortus (n = 56) was conducted with 19 strains that included B. abortus from cattle (n = 3) and B. melitensis from a human (n = 1), cattle (n = 1), goat (n = 1), Rev1 vaccine strain (n = 1), and sable (n = 12). Pangenomics analysis of B. melitensis genomes, highlighted shared genes, that include 10 hypothetical proteins and genes that encodes for acetyl-coenzyme A synthetase (acs), and acylamidase (aam) amongst the sable genomes. The wgSNP analysis confirmed the B. melitensis isolated from human was more closely related to the goat from the Western Cape Province from the same outbreak than the B. melitensis cattle sample from different cases in the Gauteng Province. The B. melitensis sable strains could be distinguished from the African lineage, constituting their own African sub-clade. The sequenced B. abortus strains clustered in the C2 lineage that is closely related to the isolates from Mozambique and Zimbabwe. This study identified genetically diverse Brucella spp. among various hosts in SA. This study expands the limited known knowledge regarding the presence of B. melitensis in livestock and humans in SA, further building a foundation for future research on the distribution of the Brucella spp. worldwide and its evolutionary background.}, } @article {pmid39035534, year = {2024}, author = {Gheorghe-Barbu, I and Surleac, M and Barbu, IC and Paraschiv, S and Bănică, LM and Rotaru, LI and Vrâncianu, CO and Niță Lazăr, M and Oțelea, D and Chifiriuc, MC}, title = {Decoding the resistome, virulome and mobilome of clinical versus aquatic Acinetobacter baumannii in southern Romania.}, journal = {Heliyon}, volume = {10}, number = {13}, pages = {e33372}, pmid = {39035534}, issn = {2405-8440}, abstract = {Acinetobacter baumannii, a notorious opportunistic pathogen, presents a formidable challenge in both clinical and environmental fields due to its resilience and ability to acquire resistance. This study undertook a comprehensive analysis of 183 A. baumannii isolates collected between 2019 and 2022 from intra-hospital infections (IHI), hospital sewages (Hs), wastewater treatment plants (WWTP), and adjacent river waters from two Southern cities, focusing on their resistome, virulome, and mobilome through isolation on chromogenic media, identification by MALDI-TOF-MS and antibiotic susceptibility testing by disk diffusion) followed by genotypic characterization [Whole Genome Sequencing (WGS), 3rd generation sequencing through the MinION (ONT) platform, pangenome description, and respectively horizontal gene transfer through conjugation assays]. Our findings reveal significant genomic plasticity and the prevalence of high-risk international clones, underlining the potential of these isolates to act as reservoirs for antibiotic resistance genes (ARGs) that could be dynamically exchanged between clinical and environmental settings through mobile genetic elements (MGEs) such as the pMAL1 plasmids and the critical role of WWTPs in the persistence and spread of A. baumannii. Moreover, our study presents the first report of the co-occurrence of bla OXA-23 and bla OXA-72 in A. baumannii ST2 clone. Thus, our research underscores the necessity for integrated surveillance and targeted interventions across healthcare and environmental sectors to mitigate the risk posed by this adaptable pathogen.}, } @article {pmid39033143, year = {2024}, author = {Xiang, Y and Zhu, K and Min, K and Zhang, Y and Liu, J and Liu, K and Han, Y and Li, X and Du, X and Wang, X and Huang, Y and Li, X and Peng, Y and Yang, C and Liu, H and Liu, H and Li, X and Wang, H and Wang, C and Wang, Q and Jia, H and Yang, M and Wang, L and Wu, Y and Cui, Y and Chen, F and Yang, H and Baker, S and Xu, X and Yang, J and Song, H and Qiu, S}, title = {Characterization of a Salmonella enterica serovar Typhimurium lineage with rough colony morphology and multidrug resistance.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {6123}, pmid = {39033143}, issn = {2041-1723}, support = {82173580//National Natural Science Foundation of China (National Science Foundation of China)/ ; 82202538//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Salmonella typhimurium/genetics/drug effects/isolation & purification ; *Drug Resistance, Multiple, Bacterial/genetics ; *Phylogeny ; *Anti-Bacterial Agents/pharmacology ; *Whole Genome Sequencing ; *Biofilms/growth & development/drug effects ; China ; *Genome, Bacterial/genetics ; Plasmids/genetics ; Microbial Sensitivity Tests ; Humans ; Salmonella Infections/microbiology ; }, abstract = {Salmonella enterica serovar Typhimurium (S. Typhimurium) is a major cause of salmonellosis, and the emergence of multidrug-resistant pathovariants has become a growing concern. Here, we investigate a distinct rough colony variant exhibiting a strong biofilm-forming ability isolated in China. Whole-genome sequencing on 2,212 Chinese isolates and 1,739 publicly available genomes reveals the population structure and evolutionary history of the rough colony variants. Characterized by macro, red, dry, and rough (mrdar) colonies, these variants demonstrate enhanced biofilm formation at 28 °C and 37 °C compared to typical rdar colonies. The mrdar variants exhibit extensive multidrug resistance, with significantly higher resistance to at least five classes of antimicrobial agents compared to non-mrdar variants. This resistance is primarily conferred by an IncHI2 plasmid harboring 19 antimicrobial resistance genes. Phylogenomic analysis divides the global collections into six lineages. The majority of mrdar variants belong to sublineage L6.5, which originated from Chinese smooth colony strains and possibly emerged circa 1977. Among the mrdar variants, upregulation of the csgDEFG operons is observed, probably due to a distinct point mutation (-44G > T) in the csgD gene promoter. Pangenome and genome-wide association analyses identify 87 specific accessory genes and 72 distinct single nucleotide polymorphisms associated with the mrdar morphotype.}, } @article {pmid39029396, year = {2024}, author = {Fan, X and Kong, L and Wang, J and Tan, Y and Xu, X and Li, M and Zhu, L}, title = {Surface-programmed microbiome assembly in phycosphere to microplastics contamination.}, journal = {Water research}, volume = {262}, number = {}, pages = {122064}, doi = {10.1016/j.watres.2024.122064}, pmid = {39029396}, issn = {1879-2448}, abstract = {Recalcitrance in microplastics accounts for ubiquitous white pollution. Of special interest are the capabilities of microorganisms to accelerate their degradation sustainably. Compared to the well-studied pure cultures in degrading natural polymers, the algal-bacterial symbiotic system is considered as a promising candidate for microplastics removal, cascading bottom-up impacts on ecosystem-scale processes. This study selected and enriched the algae-associated microbial communities hosted by the indigenous isolation Desmodesmus sp. in wastewater treatment plants with micro-polyvinyl chloride, polyethylene terephthalate, polyethylene, and polystyrene contamination. Results elaborated that multiple settled and specific affiliates were recruited by the uniform algae protagonist from the biosphere under manifold microplastic stress. Alteration of distinct chemical functionalities and deformation of polymers provide direct evidence of degradation in phycosphere under illumination. Microplastic-induced phycosphere-derived DOM created spatial gradients of aromatic protein, fulvic and humic acid-like and tryptophan components to expanded niche-width. Surface thermodynamic analysis was conducted to simulate the reciprocal and reversible interaction on algal-bacterial and phycosphere-microplastic interface, revealing the enhancement of transition to stable and irreversible aggregation for functional microbiota colonization and microplastics capture. Furthermore, pangenomic analysis disclosed the genes related to the chemotaxis and the proposed microplastics biodegradation pathway in enriched algal-bacterial microbiome, orchestrating the evidence for common synthetic polymer particles and ultimately to confirm the effectiveness and potential. The present study emphasizes the necessity for future endeavors aimed at fully leveraging the potential of algal-bacterial mutualistic systems within sustainable bioremediation strategies targeting the eradication of microplastic waste.}, } @article {pmid39026826, year = {2024}, author = {Hatmaker, EA and Barber, AE and Drott, MT and Sauters, TJC and Alastruey-Izquierdo, A and Garcia-Hermoso, D and Kurzai, O and Rokas, A}, title = {Pathogenicity is associated with population structure in a fungal pathogen of humans.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.05.602241}, pmid = {39026826}, issn = {2692-8205}, abstract = {Aspergillus flavus is a clinically and agriculturally important saprotrophic fungus responsible for severe human infections and extensive crop losses. We analyzed genomic data from 250 (95 clinical and 155 environmental) A. flavus isolates from 9 countries, including 70 newly sequenced clinical isolates, to examine population and pan-genome structure and their relationship to pathogenicity. We identified five A. flavus populations, including a new population, D, corresponding to distinct clades in the genome-wide phylogeny. Strikingly, > 75% of clinical isolates were from population D. Accessory genes, including genes within biosynthetic gene clusters, were significantly more common in some populations but rare in others. Population D was enriched for genes associated with zinc ion binding, lipid metabolism, and certain types of hydrolase activity. In contrast to the major human pathogen Aspergillus fumigatus , A. flavus pathogenicity in humans is strongly associated with population structure, making it a great system for investigating how population-specific genes contribute to pathogenicity.}, } @article {pmid39019985, year = {2024}, author = {Kusza, S and Badaoui, B and Wanjala, G}, title = {Insights into the genomic homogeneity of Moroccan indigenous sheep breeds though the lens of runs of homozygosity.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {16515}, pmid = {39019985}, issn = {2045-2322}, support = {2021-1.2.4-TÉT-2021-00014//National Development, Research and Innovation Fund/ ; Hungarian Eötvös State Scholarship//Tempus Public Foundation/ ; Stipendium Hungaricum Scholarship//Tempus Public Foundation/ ; }, mesh = {Animals ; Morocco ; *Homozygote ; *Breeding ; Sheep/genetics ; Genomics/methods ; Genome ; Polymorphism, Single Nucleotide ; Genetics, Population ; Sheep, Domestic/genetics ; Whole Genome Sequencing ; }, abstract = {Numerous studies have indicated that Morocco's indigenous sheep breeds are genetically homogenous, posing a risk to their survival in the challenging harsh climate conditions where they predominantly inhabit. To understand the genetic behind genetic homogeneity through the lens of runs of homozygosity (ROH), we analyzed the whole genome sequences of five indigenous sheep breeds (Beni Guil, Ouled Djellal, D'man, Sardi, Timahdite and Admixed).The results from principal component, admixture, Fst, and neighbour joining tree analyses consistently showed a homogenous genetic structure. This structure was characterized by an average length of 1.83 Mb for runs of homozygosity (ROH) segments, with a limited number of long ROH segments (24-48 Mb and > 48 Mb). The most common ROH segments were those ranging from 1-6 Mb. The most significant regions of homozygosity (ROH Islands) were mostly observed in two chromosomes, namely Chr1 and Chr5. Specifically, ROH Islands were exclusively discovered in the Ouled Djellal breed on Chr1, whereas Chr5 exhibited ROH Islands in all breeds. The analysis of ROH Island and iHS technique was employed to detect signatures of selection on Chr1 and Chr5. The results indicate that Chr5 had a high level of homogeneity, with the same genes being discovered across all breeds. In contrast, Chr1 displays some genetic variances between breeds. Genes identified on Chr5 included SLC39A1, IL23A, CAST, IL5, IL13, and IL4 which are responsible for immune response while genes identified on Chr1 include SOD1, SLAMF9, RTP4, CLDN1, and PRKAA2. ROH segment profile and effective population sizes patterns suggests that the genetic uniformity of studied breeds is the outcome of events that transpired between 250 and 300 generations ago. This research not only contributes to the understanding of ROH distribution across breeds but helps design and implement native sheep breeding and conservation strategies in Morocco. Future research, incorporating a broader sample size and utilizing the pangenome for reference, is recommended to further elucidate these breeds' genomic landscapes and adaptive mechanisms.}, } @article {pmid39016539, year = {2024}, author = {Machado, E and Vasconcellos, S and Gomes, L and Catanho, M and Ramos, J and de Carvalho, L and Goldenberg, T and Redner, P and Caldas, P and Campos, C and Dalcolmo, M and Lourenço, MC and Lasunskaia, E and Mussi, V and Spinassé, L and Vinhas, S and Rigouts, L and Cogneau, S and de Rijk, P and Utpatel, C and Kaustova, J and van der Laan, T and de Neeling, H and Rastogi, N and Levina, K and Kütt, M and Mokrousov, I and Zhuravlev, V and Makhado, N and Žolnir-Dovč, M and Jankovic, V and de Waard, J and Sisco, MC and van Soolingen, D and Niemann, S and de Jong, BC and Meehan, CJ and Suffys, P}, title = {Phylogenomic and genomic analysis reveals unique and shared genetic signatures of Mycobacterium kansasii complex species.}, journal = {Microbial genomics}, volume = {10}, number = {7}, pages = {}, doi = {10.1099/mgen.0.001266}, pmid = {39016539}, issn = {2057-5858}, mesh = {*Mycobacterium kansasii/genetics/classification/isolation & purification ; *Phylogeny ; Humans ; *Genome, Bacterial ; *Mycobacterium Infections, Nontuberculous/microbiology ; *Genomics ; Animals ; Virulence/genetics ; }, abstract = {Species belonging to the Mycobacterium kansasii complex (MKC) are frequently isolated from humans and the environment and can cause serious diseases. The most common MKC infections are caused by the species M. kansasii (sensu stricto), leading to tuberculosis-like disease. However, a broad spectrum of virulence, antimicrobial resistance and pathogenicity of these non-tuberculous mycobacteria (NTM) are observed across the MKC. Many genomic aspects of the MKC that relate to these broad phenotypes are not well elucidated. Here, we performed genomic analyses from a collection of 665 MKC strains, isolated from environmental, animal and human sources. We inferred the MKC pangenome, mobilome, resistome, virulome and defence systems and show that the MKC species harbours unique and shared genomic signatures. High frequency of presence of prophages and different types of defence systems were observed. We found that the M. kansasii species splits into four lineages, of which three are lowly represented and mainly in Brazil, while one lineage is dominant and globally spread. Moreover, we show that four sub-lineages of this most distributed M. kansasii lineage emerged during the twentieth century. Further analysis of the M. kansasii genomes revealed almost 300 regions of difference contributing to genomic diversity, as well as fixed mutations that may explain the M. kansasii's increased virulence and drug resistance.}, } @article {pmid39013614, year = {2024}, author = {Prigozhin, DM and Sutherland, CA and Rangavajjhala, S and Krasileva, KV}, title = {Majority of the highly variable NLRs in maize share genomic location and contain additional target-binding domains.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-05-24-0047-FI}, pmid = {39013614}, issn = {0894-0282}, abstract = {Nucleotide-binding, Leucine Rich Repeat proteins (NLRs) are a major class of immune receptors in plants. NLRs include both conserved and rapidly evolving members, however their evolutionary trajectory in crops remains understudied. Availability of crop pan-genomes enables analysis of the recent events in the evolution of this highly complex gene family within domesticated species. Here, we investigated the NLR complement of 26 nested association mapping (NAM) founder lines of maize. We found that maize has just four main subfamilies containing rapidly evolving highly variable NLR (hvNLR) receptors. Curiously, three of these phylogenetically distinct hvNLR lineages are located in adjacent clusters on chromosome 10. Members of the same hvNLR clade show variable expression and methylation across lines and tissues, consistent with their rapid evolution. By combining sequence diversity analysis and AlphaFold2 computational structure prediction we predicted ligand binding sites in the hvNLRs. We also observed novel insertion domains in the LRR regions of two hvNLR subfamilies that likely contribute to target recogniton. To make this analysis accessible, we created NLRCladeFinder, a Google Colaboratory notebook, that accepts any newly identified NLR sequence, places it in the evolutionary context of the maize pan-NLRome, and provides an updated clade alignment, phylogenetic tree, and sequence diversity information for the gene of interest.}, } @article {pmid39013594, year = {2024}, author = {Chandra, G and Gibney, D and Jain, C}, title = {Haplotype-aware sequence alignment to pangenome graphs.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.279143.124}, pmid = {39013594}, issn = {1549-5469}, abstract = {Modern pangenome graphs are built using haplotype-resolved genome assemblies. When mapping reads to a pangenome graph, prioritizing alignments that are consistent with the known haplotypes improves genotyping accuracy. However, the existing rigorous formulations for co-linear chaining and alignment problems do not consider the haplotype paths in a pangenome graph. This often leads to spurious read alignments to those paths that are unlikely recombinations of the known haplotypes. In this paper, we develop novel formulations and algorithms for sequence-to-graph alignment and chaining problems. Inspired by the genotype imputation models, we assume that a query sequence is an imperfect mosaic of reference haplotypes. Accordingly, we introduce a recombination penalty in the scoring functions for each haplotype switch. First, we solve haplotype-aware sequence-to-graph alignment in O(|Q||E||H|) time, where Q is the query sequence, E is the set of edges, and H is the set of haplotypes represented in the graph. To complement our solution, we prove that an algorithm significantly faster than O(|Q||E||H|) is impossible under the Strong Exponential Time Hypothesis (SETH). Second, we propose a haplotype-aware chaining algorithm that runs in O(|H|N log|H|N) time after graph preprocessing, where N is the count of input anchors. We then establish that a chaining algorithm significantly faster than O(|H|N) is impossible under SETH. As a proof-of-concept, we implemented our chaining algorithm in the Minichain aligner. By aligning sequences sampled from the human major histocompatibility complex (MHC) to a pangenome graph of 60 MHC haplotypes, we demonstrate that our algorithm achieves better consistency with ground-truth recombinations when compared to a haplotype-agnostic algorithm.}, } @article {pmid39014339, year = {2024}, author = {Uzuner, H and Paschen, A and Schadendorf, D and Köster, J}, title = {Orthanq: transparent and uncertainty-aware haplotype quantification with application in HLA-typing.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {240}, pmid = {39014339}, issn = {1471-2105}, support = {RTG 2535//Deutsche Forschungsgemeinschaft/ ; 70113455//Deutsche Krebshilfe/ ; }, abstract = {BACKGROUND: Identification of human leukocyte antigen (HLA) types from DNA-sequenced human samples is important in organ transplantation and cancer immunotherapy and remains a challenging task considering sequence homology and extreme polymorphism of HLA genes.

RESULTS: We present Orthanq, a novel statistical model and corresponding application for transparent and uncertainty-aware quantification of haplotypes. We utilize our approach to perform HLA typing while, for the first time, reporting uncertainty of predictions and transparently observing mutations beyond reported HLA types. Using 99 gold standard samples from 1000 Genomes, Illumina Platinum Genomes and Genome In a Bottle projects, we show that Orthanq can provide overall superior accuracy and shorter runtimes than state-of-the-art HLA typers.

CONCLUSIONS: Orthanq is the first approach that allows to directly utilize existing pangenome alignments and type all HLA loci. Moreover, it can be generalized for usages beyond HLA typing, e.g. for virus lineage quantification. Orthanq is available under https://orthanq.github.io .}, } @article {pmid39012166, year = {2024}, author = {Ceres, K and Zehr, JD and Murrell, C and Millet, JK and Sun, Q and McQueary, HC and Horton, A and Cazer, C and Sams, K and Reboul, G and Andreopoulos, WB and Mitchell, PK and Anderson, R and Franklin-Guild, R and Cronk, BD and Stanhope, BJ and Burbick, CR and Wolking, R and Peak, L and Zhang, Y and McDowall, R and Krishnamurthy, A and Slavic, D and Sekhon, Pk and Tyson, GH and Ceric, O and Stanhope, MJ and Goodman, LB}, title = {Evolutionary genomic analyses of canine E. coli infections identify a relic capsular locus associated with resistance to multiple classes of antimicrobials.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0035424}, doi = {10.1128/aem.00354-24}, pmid = {39012166}, issn = {1098-5336}, abstract = {UNLABELLED: Infections caused by antimicrobial-resistant Escherichia coli are the leading cause of death attributed to antimicrobial resistance (AMR) worldwide, and the known AMR mechanisms involve a range of functional proteins. Here, we employed a pan-genome wide association study (GWAS) approach on over 1,000 E. coli isolates from sick dogs collected across the US and Canada and identified a strong statistical association (empirical P < 0.01) of AMR, involving a range of antibiotics to a group 1 capsular (CPS) gene cluster. This cluster included genes under relaxed selection pressure, had several loci missing, and had pseudogenes for other key loci. Furthermore, this cluster is widespread in E. coli and Klebsiella clinical isolates across multiple host species. Earlier studies demonstrated that the octameric CPS polysaccharide export protein Wza can transmit macrolide antibiotics into the E. coli periplasm. We suggest that the CPS in question, and its highly divergent Wza, functions as an antibiotic trap, preventing antimicrobial penetration. We also highlight the high diversity of lineages circulating in dogs across all regions studied, the overlap with human lineages, and regional prevalence of resistance to multiple antimicrobial classes.

IMPORTANCE: Much of the human genomic epidemiology data available for E. coli mechanism discovery studies has been heavily biased toward shiga-toxin producing strains from humans and livestock. E. coli occupies many niches and produces a wide variety of other significant pathotypes, including some implicated in chronic disease. We hypothesized that since dogs tend to share similar strains with their owners and are treated with similar antibiotics, their pathogenic isolates will harbor unexplored AMR mechanisms of importance to humans as well as animals. By comparing over 1,000 genomes with in vitro antimicrobial susceptibility data from sick dogs across the US and Canada, we identified a strong multidrug resistance association with an operon that appears to have once conferred a type 1 capsule production system.}, } @article {pmid39012116, year = {2024}, author = {Jespersen, MG and Hayes, AJ and Tong, SYC and Davies, MR}, title = {Pangenome evaluation of gene essentiality in Streptococcus pyogenes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0324023}, doi = {10.1128/spectrum.03240-23}, pmid = {39012116}, issn = {2165-0497}, abstract = {Bacterial species often consist of strains with variable gene content, collectively referred to as the pangenome. Variations in the genetic makeup of strains can alter bacterial physiology and fitness. To define biologically relevant genes of a genome, genome-wide transposon mutant libraries have been used to identify genes essential for survival or virulence in a given strain. Such phenotypic studies have been conducted in four different genotypes of the human pathogen Streptococcus pyogenes, yet challenges exist in comparing results across studies conducted in different genetic backgrounds and conditions. To advance genotype to phenotype inferences across different S. pyogenes strains, we built a pangenome database of 249 S. pyogenes reference genomes. We systematically re-analyzed publicly available transposon sequencing datasets from S. pyogenes using a transposon sequencing-specific analysis pipeline, Transit. Across four genetic backgrounds and nine phenotypic conditions, 355 genes were essential for survival, corresponding to ~24% of the core genome. Clusters of Orthologous Genes (COG) categories related to coenzyme and lipid transport and growth functions were overrepresented as essential. Finally, essential operons across S. pyogenes genotypes were defined, with an increased number of essential operons detected under in vivo conditions. This study provides an extendible database to which new studies can be added, and a searchable html-based resource to direct future investigations into S. pyogenes biology.IMPORTANCEStreptococcus pyogenes is a human-adapted pathogen occupying restricted ecological niches. Understanding the essentiality of genes across different strains and experimental conditions is important to direct research questions and efforts to prevent the large burden of disease caused by S. pyogenes. To this end we systematically reanalyzed transposon sequencing studies in S. pyogenes using transposon sequencing-specific methods, integrating them into an extendible meta-analysis framework. This provides a repository of gene essentiality in S. pyogenes which was used to highlight specific genes of interest and for the community to guide future phenotypic studies.}, } @article {pmid39011297, year = {2024}, author = {Brejová, B and Gagie, T and Herencsárová, E and Vinař, T}, title = {Maximum-scoring path sets on pangenome graphs of constant treewidth.}, journal = {Frontiers in bioinformatics}, volume = {4}, number = {}, pages = {1391086}, pmid = {39011297}, issn = {2673-7647}, abstract = {We generalize a problem of finding maximum-scoring segment sets, previously studied by Csűrös (IEEE/ACM Transactions on Computational Biology and Bioinformatics, 2004, 1, 139-150), from sequences to graphs. Namely, given a vertex-weighted graph G and a non-negative startup penalty c, we can find a set of vertex-disjoint paths in G with maximum total score when each path's score is its vertices' total weight minus c. We call this new problem maximum-scoring path sets (MSPS). We present an algorithm that has a linear-time complexity for graphs with a constant treewidth. Generalization from sequences to graphs allows the algorithm to be used on pangenome graphs representing several related genomes and can be seen as a common abstraction for several biological problems on pangenomes, including searching for CpG islands, ChIP-seq data analysis, analysis of region enrichment for functional elements, or simple chaining problems.}, } @article {pmid39011009, year = {2024}, author = {Montecillo, JAV}, title = {Genomics of the Thermophilic Bacterium Thermosulfidibacter takaii Reveals Novel Lineage of Deep-Branching Bacterial Phylum.}, journal = {Indian journal of microbiology}, volume = {64}, number = {2}, pages = {762-772}, pmid = {39011009}, issn = {0046-8991}, abstract = {UNLABELLED: The thermophilic bacterium Thermosulfidibacter takaii is affiliated to the deep-branching bacterial lineage in the phylum Aquificota. However, the recent taxonomic study of the phylum Aquificota revealed that T. takaii has no specific association with the phylum. The fact that T. takaii is considered an important model organism for studying the evolution and kinetics of ancestral carbon metabolism pathways, its proper classification is therefore of significant interest. In this work, phylogenomics and comparative genomic analyses were employed to ascertain the taxonomic placement of T. takaii. Results from the phylogenetic analyses based on 16S rRNA gene and core genome sequences confirmed the exclusion of T. takaii from the phylum Aquificota and further revealed a phylum-level lineage for T. takaii. The analysis of conserved signature indels (CSIs) specific for the phylum Aquificota also supported the exclusion of T. takaii from the phylum. Pan-genome analysis of T. takaii along with the members of the closely related clade from the phylum Thermodesulfobacteriota revealed that T. takaii was indeed distinct, supporting its phylum-level placement. Furthermore, the presence of CSIs specific to T. takaii, and the results from the average nucleotide identity and average amino acid identity analyses, together with the unique characteristic of T. takaii also provided evidence supporting its assignment to a novel phylum. Based on these results, T. takaii is proposed to be transferred to a novel family, Thermosulfidibacteraceae fam. nov., of a novel order, Thermosulfidibacterales ord. nov., and a novel class, Thermosulfidibacteria classis nov., within a novel phylum Thermosulfidibacterota phyl. nov.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s12088-024-01214-9.}, } @article {pmid38648121, year = {2024}, author = {Rodenburg, SYA and de Ridder, D and Govers, F and Seidl, MF}, title = {Oomycete Metabolism Is Highly Dynamic and Reflects Lifestyle Adaptations.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {MPMI12230200R}, doi = {10.1094/MPMI-12-23-0200-R}, pmid = {38648121}, issn = {0894-0282}, abstract = {The selective pressure of pathogen-host symbiosis drives adaptations. How these interactions shape the metabolism of pathogens is largely unknown. Here, we use comparative genomics to systematically analyze the metabolic networks of oomycetes, a diverse group of eukaryotes that includes saprotrophs as well as animal and plant pathogens, with the latter causing devastating diseases with significant economic and/or ecological impacts. In our analyses of 44 oomycete species, we uncover considerable variation in metabolism that can be linked to lifestyle differences. Comparisons of metabolic gene content reveal that plant pathogenic oomycetes have a bipartite metabolism consisting of a conserved core and an accessory set. The accessory set can be associated with the degradation of defense compounds produced by plants when challenged by pathogens. Obligate biotrophic oomycetes have smaller metabolic networks, and taxonomically distantly related biotrophic lineages display convergent evolution by repeated gene losses in both the conserved as well as the accessory set of metabolisms. When investigating to what extent the metabolic networks in obligate biotrophs differ from those in hemibiotrophic plant pathogens, we observe that the losses of metabolic enzymes in obligate biotrophs are not random and that gene losses predominantly influence the terminal branches of the metabolic networks. Our analyses represent the first metabolism-focused comparison of oomycetes at this scale and will contribute to a better understanding of the evolution of oomycete metabolism in relation to lifestyle adaptation. Numerous oomycete species are devastating plant pathogens that cause major damage in crops and natural ecosystems. Their interactions with hosts are shaped by strong selection, but how selection affects adaptation of the primary metabolism to a pathogenic lifestyle is not yet well established. By pan-genome and metabolic network analyses of distantly related oomycete pathogens and their nonpathogenic relatives, we reveal considerable lifestyle- and lineage-specific adaptations. This study contributes to a better understanding of metabolic adaptations in pathogenic oomycetes in relation to lifestyle, host, and environment, and the findings will help in pinpointing potential targets for disease control. [Formula: see text] Copyright © 2024 The Author(s). This is an open access article distributed under the CC BY-NC-ND 4.0 International license.}, } @article {pmid39010229, year = {2024}, author = {Chang, T and Gavelis, GS and Brown, JM and Stepanauskas, R}, title = {Genomic representativeness and chimerism in large collections of SAGs and MAGs of marine prokaryoplankton.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {126}, pmid = {39010229}, issn = {2049-2618}, support = {827839//Simons Foundation/ ; 827839//Simons Foundation/ ; 827839//Simons Foundation/ ; 827839//Simons Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; }, abstract = {BACKGROUND: Single amplified genomes (SAGs) and metagenome-assembled genomes (MAGs) are the predominant sources of information about the coding potential of uncultured microbial lineages, but their strengths and limitations remain poorly understood. Here, we performed a direct comparison of two previously published collections of thousands of SAGs and MAGs obtained from the same, global environment.

RESULTS: We found that SAGs were less prone to chimerism and more accurately reflected the relative abundance and the pangenome content of microbial lineages inhabiting the epipelagic of the tropical and subtropical ocean, as compared to MAGs. SAGs were also better suited to link genome information with taxa discovered through 16S rRNA amplicon analyses. Meanwhile, MAGs had the advantage of more readily recovering genomes of rare lineages.

CONCLUSIONS: Our analyses revealed the relative strengths and weaknesses of the two most commonly used genome recovery approaches in environmental microbiology. These considerations, as well as the need for better tools for genome quality assessment, should be taken into account when designing studies and interpreting data that involve SAGs or MAGs. Video Abstract.}, } @article {pmid39007295, year = {2024}, author = {Bosi, E and Taviani, E and Avesani, A and Doni, L and Auguste, M and Oliveri, C and Leonessi, M and Martinez-Urtaza, J and Vetriani, C and Vezzulli, L}, title = {Pan-Genome Provides Insights into Vibrio Evolution and Adaptation to Deep-Sea Hydrothermal Vents.}, journal = {Genome biology and evolution}, volume = {16}, number = {7}, pages = {}, doi = {10.1093/gbe/evae131}, pmid = {39007295}, issn = {1759-6653}, support = {CN00000033//"National Biodiversity Future Center-NBFC/ ; 201728ZA49_002//PRIN 2017 "Emergence of virulence/ ; 2021 SGR 00526//Ministerio de Ciencia e Innovación/ ; //Generalitat de Catalunya/ ; //National Science Foundation/ ; 19-48623//OCE/ ; 19-51690//IOS/ ; 80NSSC21K0485/NASA/NASA/United States ; }, mesh = {*Vibrio/genetics ; *Hydrothermal Vents/microbiology ; *Genome, Bacterial ; *Phylogeny ; Evolution, Molecular ; Adaptation, Physiological/genetics ; Pacific Ocean ; }, abstract = {This study delves into the genomic features of 10 Vibrio strains collected from deep-sea hydrothermal vents in the Pacific Ocean, providing insights into their evolutionary history and ecological adaptations. Through sequencing and pan-genome analysis involving 141 Vibrio species, we found that deep-sea strains exhibit larger genomes with unique gene distributions, suggesting adaptation to the vent environment. The phylogenomic reconstruction of the investigated isolates revealed the presence of 2 main clades: The first is monophyletic, consisting exclusively of Vibrio alginolyticus, while the second forms a monophyletic clade comprising both Vibrio antiquarius and Vibrio diabolicus species, which were previously isolated from deep-sea vents. All strains carry virulence and antibiotic resistance genes related to those found in human pathogenic Vibrio species which may play a wider ecological role other than host infection in these environments. In addition, functional genomic analysis identified genes potentially related to deep-sea survival and stress response, alongside candidate genes encoding for novel antimicrobial agents. Ultimately, the pan-genome we generated represents a valuable resource for future studies investigating the taxonomy, evolution, and ecology of Vibrio species.}, } @article {pmid39004696, year = {2024}, author = {Seru, LV and Forde, TL and Roberto-Charron, A and Mavrot, F and Niu, YD and Kutz, SJ}, title = {Genomic characterization and virulence gene profiling of Erysipelothrix rhusiopathiae isolated from widespread muskox mortalities in the Canadian Arctic Archipelago.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {691}, pmid = {39004696}, issn = {1471-2164}, support = {NST-2122-0049//Polar Knowledge Canada/ ; Canada Research Chair in Arctic One Health (CRC-2020-00315)//Natural Sciences and Engineering Research Council of Canada/ ; }, mesh = {Arctic Regions ; *Erysipelothrix/genetics/pathogenicity/isolation & purification ; Canada ; Animals ; Virulence/genetics ; Genomics ; Genome, Bacterial ; Phylogeny ; Erysipelothrix Infections/microbiology ; Virulence Factors/genetics ; Genome-Wide Association Study ; Genomic Islands ; }, abstract = {BACKGROUND: Muskoxen are important ecosystem components and provide food, economic opportunities, and cultural well-being for Indigenous communities in the Canadian Arctic. Between 2010 and 2021, Erysipelothrix rhusiopathiae was isolated from carcasses of muskoxen, caribou, a seal, and an Arctic fox during multiple large scale mortality events in the Canadian Arctic Archipelago. A single strain ('Arctic clone') of E. rhusiopathiae was associated with the mortalities on Banks, Victoria and Prince Patrick Islands, Northwest Territories and Nunavut, Canada (2010-2017). The objectives of this study were to (i) characterize the genomes of E. rhusiopathiae isolates obtained from more recent muskox mortalities in the Canadian Arctic in 2019 and 2021; (ii) identify and compare common virulence traits associated with the core genome and mobile genetic elements (i.e. pathogenicity islands and prophages) among Arctic clone versus other E. rhusiopathiae genomes; and iii) use pan-genome wide association studies (GWAS) to determine unique genetic contents of the Arctic clone that may encode virulence traits and that could be used for diagnostic purposes.

RESULTS: Phylogenetic analyses revealed that the newly sequenced E. rhusiopathiae isolates from Ellesmere Island, Nunavut (2021) also belong to the Arctic clone. Of 17 virulence genes analysed among 28 Arctic clone isolates, four genes - adhesin, rhusiopathiae surface protein-A (rspA), choline binding protein-B (cbpB) and CDP-glycerol glycerophosphotransferase (tagF) - had amino acid sequence variants unique to this clone when compared to 31 other E. rhusiopathiae genomes. These genes encode proteins that facilitate E. rhusiopathiae to attach to the host endothelial cells and form biofilms. GWAS analyses using Scoary found several unique genes to be overrepresented in the Arctic clone.

CONCLUSIONS: The Arctic clone of E. rhusiopathiae was associated with multiple muskox mortalities spanning over a decade and multiple Arctic islands with distances over 1000 km, highlighting the extent of its spatiotemporal spread. This clone possesses unique gene content, as well as amino acid variants in multiple virulence genes that are distinct from the other closely related E. rhusiopathiae isolates. This study establishes an essential foundation on which to investigate whether these differences are correlated with the apparent virulence of this specific clone through in vitro and in vivo studies.}, } @article {pmid39003966, year = {2024}, author = {Biswas, R and Swetha, RG and Basu, S and Roy, A and Ramaiah, S and Anbarasu, A}, title = {Designing multi-epitope vaccine against human cytomegalovirus integrating pan-genome and reverse vaccinology pipelines.}, journal = {Biologicals : journal of the International Association of Biological Standardization}, volume = {87}, number = {}, pages = {101782}, doi = {10.1016/j.biologicals.2024.101782}, pmid = {39003966}, issn = {1095-8320}, abstract = {Human cytomegalovirus (HCMV) is accountable for high morbidity in neonates and immunosuppressed individuals. Due to the high genetic variability of HCMV, current prophylactic measures are insufficient. In this study, we employed a pan-genome and reverse vaccinology approach to screen the target for efficient vaccine candidates. Four proteins, envelope glycoprotein M, UL41A, US23, and US28, were shortlisted based on cellular localization, high solubility, antigenicity, and immunogenicity. A total of 29 B-cell and 44 T-cell highly immunogenic and antigenic epitopes with high global population coverage were finalized using immunoinformatics tools and algorithms. Further, the epitopes that were overlapping among the finalized B-cell and T-cell epitopes were linked with suitable linkers to form various combinations of multi-epitopic vaccine constructs. Among 16 vaccine constructs, Vc12 was selected based on physicochemical and structural properties. The docking and molecular simulations of VC12 were performed, which showed its high binding affinity (-23.35 kcal/mol) towards TLR4 due to intermolecular hydrogen bonds, salt bridges, and hydrophobic interactions, and there were only minimal fluctuations. Furthermore, Vc12 eliciting a good response was checked for its expression in Escherichia coli through in silico cloning and codon optimization, suggesting it to be a potent vaccine candidate.}, } @article {pmid39003441, year = {2024}, author = {Egor, G and Artem, K and Maksim, B and Gaukhar, Z and Ekaterina, K and Vsevolod, M and Evgeny, K}, title = {Enhancing SNV identification in whole-genome sequencing data through the incorporation of known genetic variants into the minimap2 index.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {238}, pmid = {39003441}, issn = {1471-2105}, support = {075-15-2022-294//Ministry of Science and Higher Education of the Russian Federation/ ; 075-15-2022-294//Ministry of Science and Higher Education of the Russian Federation/ ; 075-15-2021-595//Megagrant from the Government of the Russian Federation/ ; }, mesh = {Humans ; *Whole Genome Sequencing/methods ; *Genome, Human ; *Genetic Variation/genetics ; High-Throughput Nucleotide Sequencing/methods ; Polymorphism, Single Nucleotide/genetics ; Sequence Alignment/methods ; Software ; Algorithms ; Genome-Wide Association Study/methods ; }, abstract = {MOTIVATION: Alignment of reads to a reference genome sequence is one of the key steps in the analysis of human whole-genome sequencing data obtained through Next-generation sequencing (NGS) technologies. The quality of the subsequent steps of the analysis, such as the results of clinical interpretation of genetic variants or the results of a genome-wide association study, depends on the correct identification of the position of the read as a result of its alignment. The amount of human NGS whole-genome sequencing data is constantly growing. There are a number of human genome sequencing projects worldwide that have resulted in the creation of large-scale databases of genetic variants of sequenced human genomes. Such information about known genetic variants can be used to improve the quality of alignment at the read alignment stage when analysing sequencing data obtained for a new individual, for example, by creating a genomic graph. While existing methods for aligning reads to a linear reference genome have high alignment speed, methods for aligning reads to a genomic graph have greater accuracy in variable regions of the genome. The development of a read alignment method that takes into account known genetic variants in the linear reference sequence index allows combining the advantages of both sets of methods.

RESULTS: In this paper, we present the minimap2_index_modifier tool, which enables the construction of a modified index of a reference genome using known single nucleotide variants and insertions/deletions (indels) specific to a given human population. The use of the modified minimap2 index improves variant calling quality without modifying the bioinformatics pipeline and without significant additional computational overhead. Using the PrecisionFDA Truth Challenge V2 benchmark data (for HG002 short-read data aligned to the GRCh38 linear reference (GCA_000001405.15) with parameters k = 27 and w = 14) it was demonstrated that the number of false negative genetic variants decreased by more than 9500, and the number of false positives decreased by more than 7000 when modifying the index with genetic variants from the Human Pangenome Reference Consortium.}, } @article {pmid39003298, year = {2024}, author = {Yang, X and Luo, S and Yang, S and Duoji, C and Wang, Q and Chen, Z and Yang, D and Yang, T and Wan, X and Yang, Y and Liu, T and Yang, Y}, title = {Chromosome-level genome assembly of Hippophae rhamnoides variety.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {776}, pmid = {39003298}, issn = {2052-4463}, mesh = {*Hippophae/genetics ; *Genome, Plant ; *Chromosomes, Plant/genetics ; Transcriptome ; Molecular Sequence Annotation ; }, abstract = {Fructus hippophae (Hippophae rhamnoides spp. mongolica×Hippophae rhamnoides sinensis), a hybrid variety of sea buckthorn that Hippophae rhamnoides spp. mongolica serves as the female parent and Hippophae rhamnoides sinensis serves as the male parent, is a traditional plant with great potentials of economic and medical values. Herein, we gained a chromosome-level genome of Fructus hippophae about 918.59 Mb, with the scaffolds N50 reaching 83.65 Mb. Then, we anchored 440 contigs with 97.17% of the total genome sequences onto 12 pseudochromosomes. Next, de-novo, homology and transcriptome assembly strategies were adopted for gene structure prediction. This predicted 36475 protein-coding genes, of which 36226 genes could be functionally annotated. Simultaneously, various strategies were used for quality assessment, both the complete BUSCO value (98.80%) and the mapping rate indicated the high assembly quality. Repetitive elements, which occupied 63.68% of the genome, and 1483600 bp of non-coding RNA were annotated. Here, we provide genomic information on female plants of a popular variety, which can provide data for pan-genomic construction of sea buckthorn and for the resolution of the mechanism of sex differentiation.}, } @article {pmid39000250, year = {2024}, author = {Gao, Z and Lu, Y and Chong, Y and Li, M and Hong, J and Wu, J and Wu, D and Xi, D and Deng, W}, title = {Beef Cattle Genome Project: Advances in Genome Sequencing, Assembly, and Functional Genes Discovery.}, journal = {International journal of molecular sciences}, volume = {25}, number = {13}, pages = {}, doi = {10.3390/ijms25137147}, pmid = {39000250}, issn = {1422-0067}, support = {202202AE090005//Major Science and Technology Projects in Yunnan Province/ ; 202401BD070001-008//Yunnan Provincial Agricultural Union Foundation/ ; XDYC-CYCX-2022-0029//"Xingdian Talent" Industry Innovation Talent Program in Yunnan Province/ ; }, mesh = {Animals ; Cattle/genetics ; *Genome ; Genomics/methods ; Breeding/methods ; Whole Genome Sequencing/methods ; Red Meat ; Quantitative Trait Loci ; }, abstract = {Beef is a major global source of protein, playing an essential role in the human diet. The worldwide production and consumption of beef continue to rise, reflecting a significant trend. However, despite the critical importance of beef cattle resources in agriculture, the diversity of cattle breeds faces severe challenges, with many breeds at risk of extinction. The initiation of the Beef Cattle Genome Project is crucial. By constructing a high-precision functional annotation map of their genome, it becomes possible to analyze the genetic mechanisms underlying important traits in beef cattle, laying a solid foundation for breeding more efficient and productive cattle breeds. This review details advances in genome sequencing and assembly technologies, iterative upgrades of the beef cattle reference genome, and its application in pan-genome research. Additionally, it summarizes relevant studies on the discovery of functional genes associated with key traits in beef cattle, such as growth, meat quality, reproduction, polled traits, disease resistance, and environmental adaptability. Finally, the review explores the potential of telomere-to-telomere (T2T) genome assembly, structural variations (SVs), and multi-omics techniques in future beef cattle genetic breeding. These advancements collectively offer promising avenues for enhancing beef cattle breeding and improving genetic traits.}, } @article {pmid38999668, year = {2024}, author = {Huang, A and Feng, S and Ye, Z and Zhang, T and Chen, S and Chen, C and Chen, S}, title = {Genome Assembly and Structural Variation Analysis of Luffa acutangula Provide Insights on Flowering Time and Ridge Development.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {13}, pages = {}, doi = {10.3390/plants13131828}, pmid = {38999668}, issn = {2223-7747}, abstract = {Luffa spp. is an important worldwide cultivated vegetable and medicinal plant from the Cucurbitaceae family. In this study, we report a high-quality chromosome-level genome of the high-generation inbred line SG261 of Luffa acutangula. The genomic sequence was determined by PacBio long reads, Hi-C sequencing reads, and 10× Genomics sequencing, with an assembly size of 739.82 Mb, contig N50 of 18.38 Mb, and scaffold N50 of 56.08 Mb. The genome of L. acutangula SG261 was predicted to contain 27,312 protein-coding genes and 72.56% repetitive sequences, of which long terminal repeats (LTRs) were an important form of repetitive sequences, accounting for 67.84% of the genome. Phylogenetic analysis reveals that L. acutangula evolved later than Luffa cylindrica, and Luffa is closely related to Momodica charantia. Comparing the genome of L. acutangula SG261 and L. cylindrica with PacBio data, 67,128 high-quality structural variations (SVs) and 55,978 presence-absence variations (PAVs) were identified in SG261, resulting in 2424 and 1094 genes with variation in the CDS region, respectively, and there are 287 identical genes affected by two different structural variation analyses. In addition, we found that the transcription factor FY (FLOWERING LOCUS Y) families had a large expansion in L. acutangula SG261 (flowering in the morning) compared to L. cylindrica (flowering in the afternoon), which may result in the early flowering time in L. acutangula SG261. This study provides valuable reference for the breeding of and pan-genome research into Luffa species.}, } @article {pmid38996470, year = {2024}, author = {Miga, KH}, title = {From complete genomes to pangenomes.}, journal = {American journal of human genetics}, volume = {111}, number = {7}, pages = {1265-1268}, doi = {10.1016/j.ajhg.2024.05.012}, pmid = {38996470}, issn = {1537-6605}, mesh = {Humans ; *Genome, Human ; *Genomics/methods ; Human Genetics/history ; }, abstract = {Highlighting the Distinguished Speakers Symposium on "The Future of Human Genetics and Genomics," this collection of articles is based on presentations at the ASHG 2023 Annual Meeting in Washington, DC, in celebration of all our field has accomplished in the past 75 years, since the founding of ASHG in 1948.}, } @article {pmid38995500, year = {2024}, author = {Barcia-Cruz, R and Balboa, S and Lema, A and Romalde, JL}, title = {Comparative genomics of Vibrio toranzoniae strains.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {38995500}, issn = {1618-1905}, support = {AGL2016-77539-R//Agencia Estatal de Investigación/ ; ED431C 2022/23//Consellería de Cultura, Educación e Ordenación Universitaria, Xunta de Galicia/ ; }, abstract = {Vibrio toranzoniae is a marine bacterium belonging to the Splendidus clade that was originally isolated from healthy clams in Galicia (NW Spain). Its isolation from different hosts and seawater indicated two lifestyles and wide geographical distribution. The aim of the present study was to determine the differences at the genomic level among six strains (4 isolated from clam and 2 from seawater) and to determine their phylogeny. For this purpose, whole genomes of the six strains were sequenced by different technologies including Illumina and PacBio, and the resulting sequences were corrected. Genomes were annotated and compared using different online tools. Furthermore, the study of core- and pan-genomes were examined, and the phylogeny was inferred. The content of the core genome ranged from 2953 to 2766 genes and that of the pangenome ranged from 6278 to 6132, depending on the tool used. Although the strains shared certain homology, with DDH values ranging from 77.10 to 82.30 and values of OrthoANI values higher than 97%, some differences were found related to motility, capsule synthesis, iron acquisition systems or mobile genetic elements. Phylogenetic analysis of the core genome did not reveal a differentiation of the strains according to their lifestyle (commensal or free-living), but that of the pangenome indicated certain geographical isolation in the same growing area. This study led to the reclassification of some isolates formerly described as V. toranzoniae and demonstrated the importance of cured deposited sequences to proper phylogenetic assignment.}, } @article {pmid38995188, year = {2024}, author = {Li, XY and Fang, XM and Jia, HT and Bai, JL and Su, J and Zhang, YQ and Yu, LY}, title = {Noviherbaspirillum album sp. nov., an airborne bacteria isolated from an urban area of Beijing, China.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {7}, pages = {}, doi = {10.1099/ijsem.0.006450}, pmid = {38995188}, issn = {1466-5034}, mesh = {*Phylogeny ; *RNA, Ribosomal, 16S/genetics ; Beijing ; *Base Composition ; *DNA, Bacterial/genetics ; *Fatty Acids/analysis ; *Ubiquinone ; *Bacterial Typing Techniques ; *Air Microbiology ; *Nucleic Acid Hybridization ; *Sequence Analysis, DNA ; *Phospholipids/analysis ; }, abstract = {A Gram-negative, ellipsoidal to short-rod-shaped, motile bacterium was isolated from Beijing's urban air. The isolate exhibited the closest kinship with Noviherbaspirillum aerium 122213-3[T], exhibiting 98.4 % 16S rRNA gene sequence similarity. Phylogenetic analyses based on 16S rRNA gene sequences and genomes showed that it clustered closely with N. aerium 122213-3[T], thus forming a distinct phylogenetic lineage within the genus Noviherbaspirillum. The average nucleotide identity and digital DNA-DNA hybridization values between strain I16B-00201[T] and N. aerium 122213-3[T] were 84.6 and 29.4 %, respectively. The respiratory ubiquinone was ubiquinone 8. The major fatty acids (>10 %) were summed feature 3 (C16:1ω6c/C16:1ω7c, 43.3 %), summed feature 8 (C18:1ω7c/C18:1ω6c, 15.9 %) and C12:0 (11.0 %). The polyamine profile showed putrescine as the predominant compound. The polar lipid profile consisted of diphosphatidylglycerol, phosphatidylglycerol, phosphatidylethanolamine, phosphatidylcholine, unknown lipids and unknown phosphatidylaminolipids. The phenotypic, phylogenetic and chemotaxonomic results consistently supported that strain I16B-00201[T] represented a novel species of the genus Noviherbaspirillum, for which the name Noviherbaspirillum album sp. nov. is proposed, with I16B-00201[T] (=CPCC 100848[T]=KCTC 52095[T]) designated as the type strain. Its DNA G+C content is 59.4 mol%. Pan-genome analysis indicated that some Noviherbaspirillum species possess diverse nitrogen and aromatic compound metabolism pathways, suggesting their potential value in pollutant treatment.}, } @article {pmid38993487, year = {2024}, author = {Schüler, MA and Riedel, T and Overmann, J and Daniel, R and Poehlein, A}, title = {Comparative genome analyses of clinical and non-clinical Clostridioides difficile strains.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1404491}, doi = {10.3389/fmicb.2024.1404491}, pmid = {38993487}, issn = {1664-302X}, abstract = {The pathogenic bacterium Clostridioides difficile is a worldwide health burden with increasing morbidity, mortality and antibiotic resistances. Therefore, extensive research efforts are made to unravel its virulence and dissemination. One crucial aspect for C. difficile is its mobilome, which for instance allows the spread of antibiotic resistance genes (ARG) or influence strain virulence. As a nosocomial pathogen, the majority of strains analyzed originated from clinical environments and infected individuals. Nevertheless, C. difficile can also be present in human intestines without disease development or occur in diverse environmental habitats such as puddle water and soil, from which several strains could already be isolated. We therefore performed comprehensive genome comparisons of closely related clinical and non-clinical strains to identify the effects of the clinical background. Analyses included the prediction of virulence factors, ARGs, mobile genetic elements (MGEs), and detailed examinations of the pan genome. Clinical-related trends were thereby observed. While no significant differences were identified in fundamental C. difficile virulence factors, the clinical strains carried more ARGs and MGEs, and possessed a larger accessory genome. Detailed inspection of accessory genes revealed higher abundance of genes with unknown function, transcription-associated, or recombination-related activity. Accessory genes of these functions were already highlighted in other studies in association with higher strain virulence. This specific trend might allow the strains to react more efficiently on changing environmental conditions in the human host such as emerging stress factors, and potentially increase strain survival, colonization, and strain virulence. These findings indicated an adaptation of the strains to the clinical environment. Further, implementation of the analysis results in pairwise genome comparisons revealed that the majority of these accessory genes were encoded on predicted MGEs, shedding further light on the mobile genome of C. difficile. We therefore encourage the inclusion of non-clinical strains in comparative analyses.}, } @article {pmid38990940, year = {2024}, author = {Zomer, A and Ingham, CJ and von Meijenfeldt, FAB and Escobar Doncel, Á and van de Kerkhof, GT and Hamidjaja, R and Schouten, S and Schertel, L and Müller, KH and Catón, L and Hahnke, RL and Bolhuis, H and Vignolini, S and Dutilh, BE}, title = {Structural color in the bacterial domain: The ecogenomics of a 2-dimensional optical phenotype.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {29}, pages = {e2309757121}, doi = {10.1073/pnas.2309757121}, pmid = {38990940}, issn = {1091-6490}, support = {40-43500-98-4102/435004516//ZonMw (Netherlands Organisation for Health Research and Development)/ ; 860125//EC | HORIZON EUROPE Framework Programme (Horizon Europe)/ ; 2110570//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; 722842//EC | HORIZON EUROPE Framework Programme (Horizon Europe)/ ; P2ZHP2_183998/SNSF_/Swiss National Science Foundation/Switzerland ; SNSF3//Isaac Newton Trust/ ; SNSF 40B1-0_198708/SNSF_/Swiss National Science Foundation/Switzerland ; 865694//EC | European Research Council (ERC)/ ; 101001637//EC | European Research Council (ERC)/ ; BB/V00364X/1//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; 390713860//Deutsche Forschungsgemeinschaft (DFG)/ ; }, mesh = {*Genome, Bacterial ; Phenotype ; Color ; Bacteria/genetics/metabolism ; Proteobacteria/genetics/metabolism ; Phylogeny ; Metagenome ; Genome-Wide Association Study ; Bacteroidetes/genetics/metabolism ; }, abstract = {Structural color is an optical phenomenon resulting from light interacting with nanostructured materials. Although structural color (SC) is widespread in the tree of life, the underlying genetics and genomics are not well understood. Here, we collected and sequenced a set of 87 structurally colored bacterial isolates and 30 related strains lacking SC. Optical analysis of colonies indicated that diverse bacteria from at least two different phyla (Bacteroidetes and Proteobacteria) can create two-dimensional packing of cells capable of producing SC. A pan-genome-wide association approach was used to identify genes associated with SC. The biosynthesis of uroporphyrin and pterins, as well as carbohydrate utilization and metabolism, was found to be involved. Using this information, we constructed a classifier to predict SC directly from bacterial genome sequences and validated it by cultivating and scoring 100 strains that were not part of the training set. We predicted that SCr is widely distributed within gram-negative bacteria. Analysis of over 13,000 assembled metagenomes suggested that SC is nearly absent from most habitats associated with multicellular organisms except macroalgae and is abundant in marine waters and surface/air interfaces. This work provides a large-scale ecogenomics view of SC in bacteria and identifies microbial pathways and evolutionary relationships that underlie this optical phenomenon.}, } @article {pmid38990800, year = {2024}, author = {Perrier, M and Barber, AE}, title = {Unraveling the genomic diversity and virulence of human fungal pathogens through pangenomics.}, journal = {PLoS pathogens}, volume = {20}, number = {7}, pages = {e1012313}, doi = {10.1371/journal.ppat.1012313}, pmid = {38990800}, issn = {1553-7374}, mesh = {Humans ; Virulence/genetics ; *Genomics/methods ; *Fungi/genetics/pathogenicity ; *Genome, Fungal ; *Genetic Variation ; *Mycoses/microbiology/genetics ; }, } @article {pmid38987589, year = {2024}, author = {Seersholm, FV and Sjögren, KG and Koelman, J and Blank, M and Svensson, EM and Staring, J and Fraser, M and Pinotti, T and McColl, H and Gaunitz, C and Ruiz-Bedoya, T and Granehäll, L and Villegas-Ramirez, B and Fischer, A and Price, TD and Allentoft, ME and Iversen, AKN and Axelsson, T and Ahlström, T and Götherström, A and Storå, J and Kristiansen, K and Willerslev, E and Jakobsson, M and Malmström, H and Sikora, M}, title = {Repeated plague infections across six generations of Neolithic Farmers.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {38987589}, issn = {1476-4687}, abstract = {In the period between 5,300 and 4,900 calibrated years before present (cal. BP), populations across large parts of Europe underwent a period of demographic decline[1,2]. However, the cause of this so-called Neolithic decline is still debated. Some argue for an agricultural crisis resulting in the decline[3], others for the spread of an early form of plague[4]. Here we use population-scale ancient genomics to infer ancestry, social structure and pathogen infection in 108 Scandinavian Neolithic individuals from eight megalithic graves and a stone cist. We find that the Neolithic plague was widespread, detected in at least 17% of the sampled population and across large geographical distances. We demonstrate that the disease spread within the Neolithic community in three distinct infection events within a period of around 120 years. Variant graph-based pan-genomics shows that the Neolithic plague genomes retained ancestral genomic variation present in Yersinia pseudotuberculosis, including virulence factors associated with disease outcomes. In addition, we reconstruct four multigeneration pedigrees, the largest of which consists of 38 individuals spanning six generations, showing a patrilineal social organization. Lastly, we document direct genomic evidence for Neolithic female exogamy in a woman buried in a different megalithic tomb than her brothers. Taken together, our findings provide a detailed reconstruction of plague spread within a large patrilineal kinship group and identify multiple plague infections in a population dated to the beginning of the Neolithic decline.}, } @article {pmid38987396, year = {2024}, author = {Khan, A and Tian, R and Bean, SR and Yerka, M and Jiao, Y}, title = {Transcriptome and metabolome analyses reveal regulatory networks associated with nutrition synthesis in sorghum seeds.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {841}, pmid = {38987396}, issn = {2399-3642}, mesh = {*Sorghum/genetics/metabolism ; *Seeds/metabolism/genetics/growth & development ; *Metabolome ; *Transcriptome ; *Gene Expression Regulation, Plant ; Gene Regulatory Networks ; Gene Expression Profiling ; Endosperm/metabolism/genetics ; Starch/biosynthesis/metabolism ; Edible Grain/genetics/metabolism ; }, abstract = {Cereal seeds are vital for food, feed, and agricultural sustainability because they store and provide essential nutrients to human and animal food and feed systems. Unraveling molecular processes in seed development is crucial for enhancing cereal grain yield and quality. We analyze spatiotemporal transcriptome and metabolome profiles during sorghum seed development in the inbred line 'BTx623'. Morphological and molecular analyses identify the key stages of seed maturation, specifying starch biosynthesis onset at 5 days post-anthesis (dpa) and protein at 10 dpa. Transcriptome profiling from 1 to 25 dpa reveal dynamic gene expression pathways, shifting from cellular growth and embryo development (1-5 dpa) to cell division, fatty acid biosynthesis (5-25 dpa), and seed storage compounds synthesis in the endosperm (5-25 dpa). Network analysis identifies 361 and 207 hub genes linked to starch and protein synthesis in the endosperm, respectively, which will help breeders enhance sorghum grain quality. The availability of this data in the sorghum reference genome line establishes a baseline for future studies as new pangenomes emerge, which will consider copy number and presence-absence variation in functional food traits.}, } @article {pmid38977857, year = {2024}, author = {Zhang, Y and Zhao, M and Tan, J and Huang, M and Chu, X and Li, Y and Han, X and Fang, T and Tian, Y and Jarret, R and Lu, D and Chen, Y and Xue, L and Li, X and Qin, G and Li, B and Sun, Y and Deng, XW and Deng, Y and Zhang, X and He, H}, title = {Telomere-to-telomere Citrullus super-pangenome provides direction for watermelon breeding.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38977857}, issn = {1546-1718}, abstract = {To decipher the genetic diversity within the cucurbit genus Citrullus, we generated telomere-to-telomere (T2T) assemblies of 27 distinct genotypes, encompassing all seven Citrullus species. This T2T super-pangenome has expanded the previously published reference genome, T2T-G42, by adding 399.2 Mb and 11,225 genes. Comparative analysis has unveiled gene variants and structural variations (SVs), shedding light on watermelon evolution and domestication processes that enhanced attributes such as bitterness and sugar content while compromising disease resistance. Multidisease-resistant loci from Citrullus amarus and Citrullus mucosospermus were successfully introduced into cultivated Citrullus lanatus. The SVs identified in C. lanatus have not only been inherited from cordophanus but also from C. mucosospermus, suggesting additional ancestors beyond cordophanus in the lineage of cultivated watermelon. Our investigation substantially improves the comprehension of watermelon genome diversity, furnishing comprehensive reference genomes for all Citrullus species. This advancement aids in the exploration and genetic enhancement of watermelon using its wild relatives.}, } @article {pmid38977308, year = {2024}, author = {Vakirlis, N and Kupczok, A}, title = {Large-scale investigation of species-specific orphan genes in the human gut microbiome elucidates their evolutionary origins.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278977.124}, pmid = {38977308}, issn = {1549-5469}, abstract = {Species-specific genes, also known as orphans, are ubiquitous across life's domains. In prokaryotes, species-specific orphan genes (SSOGs) are mostly thought to originate in external elements such as viruses followed by horizontal gene transfer, whereas the scenario of native origination, through rapid divergence or de novo, is mostly dismissed. However, quantitative evidence supporting either scenario is lacking. Here, we systematically analyzed genomes from 4644 human gut microbiome species and identified more than 600,000 unique SSOGs, representing an average of 2.6% of a given species' pangenome. These sequences are mostly rare within each species yet show signs of purifying selection. Overall, SSOGs use optimal codons less frequently, and their proteins are more disordered than those of conserved genes (i.e., non-SSOGs). Importantly, across species, the GC content of SSOGs closely matches that of conserved ones. In contrast, the ∼5% of SSOGs that share similarity to known viral sequences have distinct characteristics, including lower GC content. Thus, SSOGs with similarity to viruses differ from the remaining SSOGs, contrasting an external origination scenario for most of them. By examining the orthologous genomic region in closely related species, we show that a small subset of SSOGs likely evolved natively de novo and find that these genes also differ in their properties from the remaining SSOGs. Our results challenge the notion that external elements are the dominant source of prokaryotic genetic novelty and will enable future studies into the biological role and relevance of species-specific genes in the human gut.}, } @article {pmid38974320, year = {2024}, author = {Lee, J and Hunter, B and Shim, H}, title = {A pangenome analysis of ESKAPE bacteriophages: the underrepresentation may impact machine learning models.}, journal = {Frontiers in molecular biosciences}, volume = {11}, number = {}, pages = {1395450}, pmid = {38974320}, issn = {2296-889X}, abstract = {Bacteriophages are the most prevalent biological entities in the biosphere. However, limitations in both medical relevance and sequencing technologies have led to a systematic underestimation of the genetic diversity within phages. This underrepresentation not only creates a significant gap in our understanding of phage roles across diverse biosystems but also introduces biases in computational models reliant on these data for training and testing. In this study, we focused on publicly available genomes of bacteriophages infecting high-priority ESKAPE pathogens to show the extent and impact of this underrepresentation. First, we demonstrate a stark underrepresentation of ESKAPE phage genomes within the public genome and protein databases. Next, a pangenome analysis of these ESKAPE phages reveals extensive sharing of core genes among phages infecting the same host. Furthermore, genome analyses and clustering highlight close nucleotide-level relationships among the ESKAPE phages, raising concerns about the limited diversity within current public databases. Lastly, we uncover a scarcity of unique lytic phages and phage proteins with antimicrobial activities against ESKAPE pathogens. This comprehensive analysis of the ESKAPE phages underscores the severity of underrepresentation and its potential implications. This lack of diversity in phage genomes may restrict the resurgence of phage therapy and cause biased outcomes in data-driven computational models due to incomplete and unbalanced biological datasets.}, } @article {pmid38967823, year = {2024}, author = {Lu, YT and Wu, YY and Li, YN and Zheng, WY and Liu, WZ}, title = {Saccharopolyspora mangrovi sp. nov., a novel mangrove soil actinobacterium with distinct metabolic potential revealed by comparative genomic analysis.}, journal = {Archives of microbiology}, volume = {206}, number = {8}, pages = {342}, pmid = {38967823}, issn = {1432-072X}, support = {32202121//National Natural Science Foundation of China/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *Saccharopolyspora/genetics/metabolism/classification ; *RNA, Ribosomal, 16S/genetics ; *Genome, Bacterial ; *DNA, Bacterial/genetics ; Multigene Family ; Genomics ; Sequence Analysis, DNA ; Wetlands ; Nucleic Acid Hybridization ; Bacterial Typing Techniques ; }, abstract = {A novel mangrove soil-derived actinomycete, strain S2-29[T], was found to be most closely related to Saccharopolyspora karakumensis 5K548[T] based on 16 S rRNA sequence (99.24% similarity) and genomic phylogenetic analyses. However, significant divergence in digital DNA-DNA hybridization, average nucleotide identity, and unique biosynthetic gene cluster possession distinguished S2-29[T] as a distinct Saccharopolyspora species. Pan genome evaluation revealed exceptional genomic flexibility in genus Saccharopolyspora, with > 95% accessory genome content. Strain S2-29[T] harbored 718 unique genes, largely implicated in energetic metabolisms, indicating different metabolic capacities from its close relatives. Several uncharacterized biosynthetic gene clusters in strain S2-29[T] highlighted the strain's untapped capacity to produce novel functional compounds with potential biotechnological applications. Designation as novel species Saccharopolyspora mangrovi sp. nov. (type strain S2-29[T] = JCM 34,548[T] = CGMCC 4.7716[T]) was warranted, expanding the known Saccharopolyspora diversity and ecology. The discovery of this mangrove-adapted strain advances understanding of the genus while highlighting an untapped source of chemical diversity.}, } @article {pmid38966864, year = {2024}, author = {Wang, Y and Ding, K and Li, H and Kuang, Y and Liang, Z}, title = {Biography of Vitis genomics: recent advances and prospective.}, journal = {Horticulture research}, volume = {11}, number = {7}, pages = {uhae128}, doi = {10.1093/hr/uhae128}, pmid = {38966864}, issn = {2662-6810}, abstract = {The grape genome is the basis for grape studies and breeding, and is also important for grape industries. In the last two decades, more than 44 grape genomes have been sequenced. Based on these genomes, researchers have made substantial progress in understanding the mechanism of biotic and abiotic resistance, berry quality formation, and breeding strategies. In addition, this work has provided essential data for future pangenome analyses. Apart from de novo assembled genomes, more than six whole-genome sequencing projects have provided datasets comprising almost 5000 accessions. Based on these datasets, researchers have explored the domestication and origins of the grape and clarified the gene flow that occurred during its dispersed history. Moreover, genome-wide association studies and other methods have been used to identify more than 900 genes related to resistance, quality, and developmental phases of grape. These findings have benefited grape studies and provide some basis for smart genomic selection breeding. Moreover, the grape genome has played a great role in grape studies and the grape industry, and the importance of genomics will increase sharply in the future.}, } @article {pmid38965152, year = {2024}, author = {Sundaresan, AK and Gangwar, J and Murugavel, A and Malli Mohan, GB and Ramakrishnan, J}, title = {Complete genome sequence, phenotypic correlation and pangenome analysis of uropathogenic Klebsiella spp.}, journal = {AMB Express}, volume = {14}, number = {1}, pages = {78}, pmid = {38965152}, issn = {2191-0855}, abstract = {Urinary tract infections (UTI) by antibiotic resistant and virulent K. pneumoniae are a growing concern. Understanding the genome and validating the genomic profile along with pangenome analysis will facilitate surveillance of high-risk clones of K. pneumoniae to underpin management strategies toward early detection. The present study aims to correlate resistome with phenotypic antimicrobial resistance and virulome with pathogenicity in Klebsiella spp. The present study aimed to perform complete genome sequences of Klebsiella spp. and to analyse the correlation of resistome with phenotypic antimicrobial resistance and virulome with pathogenicity. To understand the resistome, pangenome and virulome in the Klebsiella spp, the ResFinder, CARD, IS Finder, PlasmidFinder, PHASTER, Roary, VFDB were used. The phenotypic susceptibility profiling identified the uropathogenic kp3 to exhibit multi drug resistance. The resistome and in vitro antimicrobial profiling showed concordance with all the tested antibiotics against the study strains. Hypermucoviscosity was not observed for any of the test isolates; this phenotypic character matches perfectly with the absence of rmpA and magA genes. To the best of our knowledge, this is the first report on the presence of ste, stf, stc and sti major fimbrial operons of Salmonella enterica serotype Typhimurium in K. pneumoniae genome. The study identifies the discordance of virulome and virulence in Klebsiella spp. The complete genome analysis and phenotypic correlation identify uropathogenic K. pneumoniae kp3 as a carbapenem-resistant and virulent pathogen. The Pangenome of K. pneumoniae was open suggesting high genetic diversity. Diverse K serotypes were observed. Sequence typing reveals the prevalence of K. pneumoniae high-risk clones in UTI catheterised patients. The study also highlights the concordance of resistome and in vitro susceptibility tests. Importantly, the study identifies the necessity of virulome and phenotypic virulence markers for timely diagnosis and immediate treatment for the management of high-risk K. pneumoniae clones.}, } @article {pmid38962716, year = {2024}, author = {Li, X and Dai, X and He, H and Lv, Y and Yang, L and He, W and Liu, C and Wei, H and Liu, X and Yuan, Q and Wang, X and Wang, T and Zhang, B and Zhang, H and Chen, W and Leng, Y and Yu, X and Qian, H and Zhang, B and Guo, M and Zhang, Z and Shi, C and Zhang, Q and Cui, Y and Xu, Q and Cao, X and Chen, D and Zhou, Y and Qian, Q and Shang, L}, title = {A pan-TE map highlights transposable elements underlying domestication and agronomic traits in Asian rice.}, journal = {National science review}, volume = {11}, number = {6}, pages = {nwae188}, pmid = {38962716}, issn = {2053-714X}, abstract = {Transposable elements (TEs) are ubiquitous genomic components and hard to study due to being highly repetitive. Here we assembled 232 chromosome-level genomes based on long-read sequencing data. Coupling the 232 genomes with 15 existing assemblies, we developed a pan-TE map comprising both cultivated and wild Asian rice. We detected 177 084 high-quality TE variations and inferred their derived state using outgroups. We found TEs were one source of phenotypic variation during rice domestication and differentiation. We identified 1246 genes whose expression variation was associated with TEs but not single-nucleotide polymorphisms (SNPs), such as OsRbohB, and validated OsRbohB's relative expression activity using a dual-Luciferase (LUC) reporter assays system. Our pan-TE map allowed us to detect multiple novel loci associated with agronomic traits. Collectively, our findings highlight the contributions of TEs to domestication, differentiation and agronomic traits in rice, and there is massive potential for gene cloning and molecular breeding by the high-quality Asian pan-TE map we generated.}, } @article {pmid38962128, year = {2024}, author = {Zhang, B and Ren, H and Wang, X and Han, C and Jin, Y and Hu, X and Shi, R and Li, C and Wang, Y and Li, Y and Lu, S and Liu, Z and Hu, P}, title = {Comparative genomics analysis to explore the biodiversity and mining novel target genes of Listeria monocytogenes strains from different regions.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1424868}, pmid = {38962128}, issn = {1664-302X}, abstract = {As a common foodborne pathogen, infection with L. monocytogenes poses a significant threat to human life and health. The objective of this study was to employ comparative genomics to unveil the biodiversity and evolutionary characteristics of L. monocytogenes strains from different regions, screening for potential target genes and mining novel target genes, thus providing significant reference value for the specific molecular detection and therapeutic targets of L. monocytogenes strains. Pan-genomic analysis revealed that L. monocytogenes from different regions have open genomes, providing a solid genetic basis for adaptation to different environments. These strains contain numerous virulence genes that contribute to their high pathogenicity. They also exhibit relatively high resistance to phosphonic acid, glycopeptide, lincosamide, and peptide antibiotics. The results of mobile genetic elements indicate that, despite being located in different geographical locations, there is a certain degree of similarity in bacterial genome evolution and adaptation to specific environmental pressures. The potential target genes identified through pan-genomics are primarily associated with the fundamental life activities and infection invasion of L. monocytogenes, including known targets such as inlB, which can be utilized for molecular detection and therapeutic purposes. After screening a large number of potential target genes, we further screened them using hub gene selection methods to mining novel target genes. The present study employed eight different hub gene screening methods, ultimately identifying ten highly connected hub genes (bglF_1, davD, menE_1, tilS, dapX, iolC, gshAB, cysG, trpA, and hisC), which play crucial roles in the pathogenesis of L. monocytogenes. The results of pan-genomic analysis showed that L. monocytogenes from different regions exhibit high similarity in bacterial genome evolution. The PCR results demonstrated the excellent specificity of the bglF_1 and davD genes for L. monocytogenes. Therefore, the bglF_1 and davD genes hold promise as specific molecular detection and therapeutic targets for L. monocytogenes strains from different regions.}, } @article {pmid38960860, year = {2024}, author = {Heumos, S and Guarracino, A and Schmelzle, JM and Li, J and Zhang, Z and Hagmann, J and Nahnsen, S and Prins, P and Garrison, E}, title = {Pangenome graph layout by Path-Guided Stochastic Gradient Descent.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae363}, pmid = {38960860}, issn = {1367-4811}, abstract = {MOTIVATION: The increasing availability of complete genomes demands for models to study genomic variability within entire populations. Pangenome graphs capture the full genomic similarity and diversity between multiple genomes. In order to understand them, we need to see them. For visualization, we need a human readable graph layout: A graph embedding in low (e.g. two) dimensional depictions. Due to a pangenome graph's potential excessive size, this is a significant challenge.

RESULTS: In response, we introduce a novel graph layout algorithm: the Path-Guided Stochastic Gradient Descent (PG-SGD). PG-SGD uses the genomes, represented in the pangenome graph as paths, as an embedded positional system to sample genomic distances between pairs of nodes. This avoids the quadratic cost seen in previous versions of graph drawing by Stochastic Gradient Descent (SGD). We show that our implementation efficiently computes the low dimensional layouts of gigabase-scale pangenome graphs, unveiling their biological features.

AVAILABILITY: We integrated PG-SGD in ODGI which is released as free software under the MIT open source license. Source code is available at https://github.com/pangenome/odgi.}, } @article {pmid38955273, year = {2024}, author = {Tong, W and Yang, D and Qiu, S and Tian, S and Ye, Z and Yang, S and Yan, L and Li, W and Li, N and Pei, X and Sun, Z and Liu, C and Peng, S and Li, Y and Wang, Q and Peng, Z}, title = {Relevance of genetic causes and environmental adaptation of Cronobacter spp. isolated from infant and follow-up formula production factories and retailed products in China: A 7-year period of continuous surveillance based on genome-wide analysis.}, journal = {The Science of the total environment}, volume = {}, number = {}, pages = {174368}, doi = {10.1016/j.scitotenv.2024.174368}, pmid = {38955273}, issn = {1879-1026}, abstract = {The possible contamination routes, environmental adaptation, and genetic basis of Cronobacter spp. in infant and follow-up formula production factories and retailed products in mainland China have been determined by laboratory studies and whole-genome comparative analysis in a 7-year nationwide continuous surveillance spanning from 2012 to 2018. The 2-year continuous multicenter surveillance of the production process (conducted in 2013 and 2014) revealed that the source of Cronobacter spp. in the dry-blending process was the raw dry ingredients and manufacturing environment (particularly in the vibro sieve and vacuum cleaner), while in the combined process, the main contamination source was identified as the packing room. It is important to note that, according to the contamination control knowledge obtained from the production process surveillance, the contamination rate of retail powdered infant formula (PIF) and follow-up formula (FUF) products in China decreased significantly from 2016 onward, after improving the hygiene management practices in factories. The prevalence of Cronobacter spp. in retailed PIF and FUF in China in 2018 was dramatically reduced from 1.55 % (61/3925, in 2012) to an average as low as 0.17 % (13/7655 in 2018). Phenotype determination and genomic analysis were performed on a total of 90 Cronobacter spp. isolates obtained from the surveillance. Of the 90 isolates, only two showed resistance to either cefazolin or cefoxitin. The multilocus sequence typing results revealed that C. sakazakii sequence type 1 (ST1), ST37, and C. malonaticus ST7 were the dominant sequence types (STs) collected from the production factories, while C. sakazakii ST1, ST4, ST64, and ST8 were the main STs detected in the retailed PIF and FUF nationwide. One C. sakazakii ST4 isolate (1.1 %, 1/90) had strong biofilm-forming ability and 13 isolates (14.4 %, 13/90) had weak biofilm-forming ability. Genomic analysis revealed that Cronobacter spp. have a relatively stable core-genome and an increasing pan-genome size. Plasmid IncFIB (pCTU3) was prevalent in this genus and some contained 14 antibacterial biocide- and metal-resistance genes (BMRGs) including copper, silver, and arsenic resistant genes. Plasmid IncN_1 was predicted to contain 6 ARGs. This is the first time that a multi-drug resistance IncN_1 type plasmid has been reported in Cronobacter spp. Genomic variations with respect to BMRGs, virulence genes, antimicrobial resistance genes (ARGs), and genes involved in biofilm formation were observed among strains of this genus. There were apparent differences in copies of bcsG and flgJ between the biofilm-forming group and non-biofilm-forming group, indicating that these two genes play key roles in biofilm formation. The findings of this study have improved our understanding of the contamination characteristics and genetic basis of Cronobacter spp. in PIF and FUF and their production environment in China and provide important guidance to reduce contamination with this pathogen during the production of PIF and FUF.}, } @article {pmid38955887, year = {2024}, author = {Dahiya, P and Kumar, P and Rani, S and Dang, AS and Suneja, P}, title = {Comparative Genomic and Functional Analyses for Insights into Pantoea agglomerans Strains Adaptability in Diverse Ecological Niches.}, journal = {Current microbiology}, volume = {81}, number = {8}, pages = {254}, pmid = {38955887}, issn = {1432-0991}, mesh = {*Pantoea/genetics/physiology/classification ; *Genome, Bacterial ; *Phylogeny ; Genomics ; Adaptation, Physiological ; Humans ; Animals ; Plants/microbiology ; Bacterial Proteins/genetics/metabolism ; }, abstract = {Pantoea agglomerans inhabit diverse ecological niches, ranging from epiphytes and endophytes in plants, body of animals, and occasionally in the human system. This multifaceted bacterium contributes substantially to plant growth promotion, stress resilience, and biocontrol but can also act as a pathogen to its host. The genetic determinants underlying these diverse functions remain largely unfathomed and to uncover this phenomenon, nineteen strains of Pantoea agglomerans were selected and analyzed. Genome-to-Genome Distance Calculator (GGDC) which uses the Genome Blast Distance Phylogeny (GBDP) technique to calculate digital DDH values. Phylogenetic analysis via Genome-to-Genome distance, Average Nucleotide Identity, and Amino Acid Identity calculation revealed that all strains belonged to the genus Pantoea. However, strain 33.1 had a lower value than the threshold for the same species delineation. Bacterial Pan Genome Analysis (BPGA) Pipeline and MinPath analysis revealed genetic traits associated with environmental resilience, such as oxidative stress, UV radiation, temperature extremes, and metabolism of distinct host-specific carbohydrates. Protein-protein interactome analysis illustrated osmotic stress proteins closely linked with core proteins, while heavy metal tolerance, nitrogen metabolism, and Type III and VI secretion systems proteins generally associated with pathogenicity formed a separate network, indicating strain-specific characteristics. These findings shed new light on the intricate genetic architecture of Pantoea agglomerans, revealing its adaptability to inhabit diverse niches and thrive in varied environments.}, } @article {pmid38952705, year = {2024}, author = {Shchyogolev, SY and Burygin, GL and Dykman, LA and Matora, LY}, title = {Phylogenetic and pangenomic analyses of members of the family Micrococcaceae related to a plant-growth-promoting rhizobacterium isolated from the rhizosphere of potato (Solanum tuberosum L.).}, journal = {Vavilovskii zhurnal genetiki i selektsii}, volume = {28}, number = {3}, pages = {308-316}, doi = {10.18699/vjgb-24-35}, pmid = {38952705}, issn = {2500-0462}, abstract = {We report the results of taxonomic studies on members of the family Micrococcaceae that, according to the 16S rRNA, internal transcribed spacer 1 (ITS1), average nucleotide identity (ANI), and average amino acid identity (AAI) tests, are related to Kocuria rosea strain RCAM04488, a plant-growth-promoting rhizobacterium (PGPR) isolated from the rhizosphere of potato (Solanum tuberosum L.). In these studies, we used whole-genome phylogenetic tests and pangenomic analysis. According to the ANI > 95 % criterion, several known members of K. salina, K. polaris, and K. rosea (including K. rosea type strain ATCC 186T) that are related most closely to isolate RCAM04488 in the ITS1 test should be assigned to the same species with appropriate strain verification. However, these strains were isolated from strongly contrasting ecological and geographical habitats, which could not but affect their genotypes and phenotypes and which should be taken into account in evaluation of their systematic position. This contradiction was resolved by a pangenomic analysis, which showed that the strains differed strongly in the number of accessory and strain-specific genes determining their individuality and possibly their potential for adaptation to different ecological niches. Similar results were obtained in a full-scale AAI test against the UniProt database (about 250 million records), by using the AAI-profiler program and the proteome of K. rosea strain ATCC 186T as a query. According to the AAI > 65 % criterion, members of the genus Arthrobacter and several other genera belonging to the class Actinomycetes, with a very wide geographical and ecological range of sources of isolation, should be placed into the same genus as Kocuria. Within the paradigm with vertically inherited phylogenetic markers, this could be regarded as a signal for their following taxonomic reclassification. An important factor in this case may be the detailing of the gene composition of the strains and the taxonomic ratios resulting from analysis of the pangenomes of the corresponding clades.}, } @article {pmid38951917, year = {2024}, author = {Niu, J and Wang, W and Wang, Z and Chen, Z and Zhang, X and Qin, Z and Miao, L and Yang, Z and Xie, C and Xin, M and Peng, H and Yao, Y and Liu, J and Ni, Z and Sun, Q and Guo, W}, title = {Tagging large CNV blocks in wheat boosts digitalization of germplasm resources by ultra-low-coverage sequencing.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {171}, pmid = {38951917}, issn = {1474-760X}, support = {2020YFE0202300//National Key Research and Development Program of China/ ; 32322059//National Natural Science Foundation of China/ ; PC2023B01016//Pinduoduo-China Agricultural University Research Fund/ ; BX20230414//National Postdoctoral Program for Innovative Talents/ ; }, mesh = {*Triticum/genetics ; *DNA Copy Number Variations ; Genome, Plant ; High-Throughput Nucleotide Sequencing ; Genetic Markers ; Alleles ; }, abstract = {BACKGROUND: The massive structural variations and frequent introgression highly contribute to the genetic diversity of wheat, while the huge and complex genome of polyploid wheat hinders efficient genotyping of abundant varieties towards accurate identification, management, and exploitation of germplasm resources.

RESULTS: We develop a novel workflow that identifies 1240 high-quality large copy number variation blocks (CNVb) in wheat at the pan-genome level, demonstrating that CNVb can serve as an ideal DNA fingerprinting marker for discriminating massive varieties, with the accuracy validated by PCR assay. We then construct a digitalized genotyping CNVb map across 1599 global wheat accessions. Key CNVb markers are linked with trait-associated introgressions, such as the 1RS·1BL translocation and 2N[v]S translocation, and the beneficial alleles, such as the end-use quality allele Glu-D1d (Dx5 + Dy10) and the semi-dwarf r-e-z allele. Furthermore, we demonstrate that these tagged CNVb markers promote a stable and cost-effective strategy for evaluating wheat germplasm resources with ultra-low-coverage sequencing data, competing with SNP array for applications such as evaluating new varieties, efficient management of collections in gene banks, and describing wheat germplasm resources in a digitalized manner. We also develop a user-friendly interactive platform, WheatCNVb (http://wheat.cau.edu.cn/WheatCNVb/), for exploring the CNVb profiles over ever-increasing wheat accessions, and also propose a QR-code-like representation of individual digital CNVb fingerprint. This platform also allows uploading new CNVb profiles for comparison with stored varieties.

CONCLUSIONS: The CNVb-based approach provides a low-cost and high-throughput genotyping strategy for enabling digitalized wheat germplasm management and modern breeding with precise and practical decision-making.}, } @article {pmid38951884, year = {2024}, author = {Lamkiewicz, K and Barf, LM and Sachse, K and Hölzer, M}, title = {RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {170}, pmid = {38951884}, issn = {1474-760X}, support = {NFDI 28/1//Deutsche Forschungsgemeinschaft/ ; FZT 118//Deutsche Forschungsgemeinschaft/ ; SFB 1076/3 A06//Deutsche Forschungsgemeinschaft/ ; }, mesh = {*Genome, Bacterial ; *Molecular Sequence Annotation ; *Software ; Brucella/genetics/classification ; Bacteria/genetics/classification ; Chlamydia/genetics ; Enterococcus/genetics ; Klebsiella/genetics ; }, abstract = {Microbial pangenome analysis identifies present or absent genes in prokaryotic genomes. However, current tools are limited when analyzing species with higher sequence diversity or higher taxonomic orders such as genera or families. The Roary ILP Bacterial core Annotation Pipeline (RIBAP) uses an integer linear programming approach to refine gene clusters predicted by Roary for identifying core genes. RIBAP successfully handles the complexity and diversity of Chlamydia, Klebsiella, Brucella, and Enterococcus genomes, outperforming other established and recent pangenome tools for identifying all-encompassing core genes at the genus level. RIBAP is a freely available Nextflow pipeline at github.com/hoelzer-lab/ribap and zenodo.org/doi/10.5281/zenodo.10890871.}, } @article {pmid38950433, year = {2024}, author = {Rodriguez-Valera, F and Bellas, C}, title = {How Viruses Shape Microbial Plankton Microdiversity.}, journal = {Annual review of marine science}, volume = {}, number = {}, pages = {}, doi = {10.1146/annurev-marine-040623-090847}, pmid = {38950433}, issn = {1941-0611}, abstract = {One major conundrum of modern microbiology is the large pangenome (gene pool) present in microbes, which is much larger than those found in complex organisms such as humans. Here, we argue that this diversity of gene pools carried by different strains is maintained largely due to the control exercised by viral predation. Viruses maintain a high strain diversity through time that we describe as constant-diversity equilibrium, preventing the hoarding of resources by specific clones. Thus, viruses facilitate the release and degradation of dissolved organic matter in the ocean, which may lead to better ecosystem functioning by linking top-down to bottom-up control. By maintaining this equilibrium, viruses act as a key element of the adaptation of marine microbes to their environment and likely evolve as a single evolutionary unit.}, } @article {pmid38948819, year = {2024}, author = {Kantor, EJH and Robicheau, BM and Tolman, J and Archibald, JM and LaRoche, J}, title = {Targeted metagenomics reveals pangenomic diversity of the nitroplast (UCYN-A) and its algal host plastid.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.06.19.599377}, pmid = {38948819}, issn = {2692-8205}, abstract = {UCYN-A (Cand. Atelocyanobacterium thalassa) has recently been recognized as a globally-distributed, early stage, nitrogen-fixing organelle (the 'nitroplast') of cyanobacterial origin present in select species of haptophyte algae (e.g., Braarudosphaera bigelowii). Although the nitroplast was recognized as the UCYN-A2 sublineage, it is yet to be confirmed in other sublineages of the algal/UCYN-A complex. We used water samples collected from Halifax Harbour (Bedford Basin, Nova Scotia, Canada) and the offshore Scotian Shelf to further our understanding of B. bigelowii and UCYN-A in the coastal Northwest Atlantic. Sequencing data revealed UCYN-A-associated haptophyte signatures and yielded near-complete metagenome-assembled genomes (MAGs) for UCYN-A1, UCYN-A4, and the plastid of the A4-associated haptophyte. Comparative genomics provided new insights into the pangenome of UCYN-A. The UCYN-A4 MAG is the first genome sequenced from this sublineage and shares ~85% identity with the UCYN-A2 nitroplast. Genes missing in the reduced genome of the nitroplast were also missing in the A4 MAG supporting its likely classification as a nitroplast as well. The UCYN-A1 MAG was found to be nearly 100% identical to the reference genome despite coming from different ocean basins. Time-series data paired with the recurrence of specific microbes in enrichment cultures gave insight into the microbes that frequently co-occur with the algal/UCYN-A complex (e.g., Pelagibacter ubique). Overall, our study expands knowledge of UCYN-A and its host across major ocean basins and investigates their co-occurring microbes in the coastal Northwest Atlantic (NWA), thereby facilitating future studies on the underpinnings of haptophyte-associated diazotrophy in the sea.}, } @article {pmid38948139, year = {2024}, author = {Zhang, P and Zhang, B and Ji, YY and Jiao, J and Zhang, Z and Tian, CF}, title = {Cofitness network connectivity determines a fuzzy essential zone in open bacterial pangenome.}, journal = {mLife}, volume = {3}, number = {2}, pages = {277-290}, pmid = {38948139}, issn = {2770-100X}, abstract = {Most in silico evolutionary studies commonly assumed that core genes are essential for cellular function, while accessory genes are dispensable, particularly in nutrient-rich environments. However, this assumption is seldom tested genetically within the pangenome context. In this study, we conducted a robust pangenomic Tn-seq analysis of fitness genes in a nutrient-rich medium for Sinorhizobium strains with a canonical open pangenome. To evaluate the robustness of fitness category assignment, Tn-seq data for three independent mutant libraries per strain were analyzed by three methods, which indicates that the Hidden Markov Model (HMM)-based method is most robust to variations between mutant libraries and not sensitive to data size, outperforming the Bayesian and Monte Carlo simulation-based methods. Consequently, the HMM method was used to classify the fitness category. Fitness genes, categorized as essential (ES), advantage (GA), and disadvantage (GD) genes for growth, are enriched in core genes, while nonessential genes (NE) are over-represented in accessory genes. Accessory ES/GA genes showed a lower fitness effect than core ES/GA genes. Connectivity degrees in the cofitness network decrease in the order of ES, GD, and GA/NE. In addition to accessory genes, 1599 out of 3284 core genes display differential essentiality across test strains. Within the pangenome core, both shared quasi-essential (ES and GA) and strain-dependent fitness genes are enriched in similar functional categories. Our analysis demonstrates a considerable fuzzy essential zone determined by cofitness connectivity degrees in Sinorhizobium pangenome and highlights the power of the cofitness network in understanding the genetic basis of ever-increasing prokaryotic pangenome data.}, } @article {pmid38947078, year = {2024}, author = {Socarras, KM and Marino, MC and Earl, JP and Ehrlich, RL and Cramer, NA and Mell, JC and Sen, B and Ahmed, A and Marconi, RT and Ehrlich, GD}, title = {Characterization of the family-level Borreliaceae pan-genome and development of an episomal typing protocol.}, journal = {Research square}, volume = {}, number = {}, pages = {}, doi = {10.21203/rs.3.rs-4491589/v1}, pmid = {38947078}, abstract = {Background The Borreliaceae family includes many obligate parasitic bacterial species which are etiologically associated with a myriad of zoonotic borrelioses including Lyme disease and vector-borne relapsing fevers. Infections by the Borreliaceae are difficult to detect by both direct and indirect methods, often leading to delayed and missed diagnoses. Efforts to improve diagnoses center around the development of molecular diagnostics (MDx), but due to deep tissue sequestration of the causative spirochaetes and the lack of persistent bacteremias, even MDx assays suffer from a lack of sensitivity. Additionally, the highly extensive genomic heterogeneity among isolates, even within the same species, contributes to the lack of assay sensitivity as single target assays cannot provide universal coverage. This within-species heterogeneity is partly due to differences in replicon repertoires and genomic structures that have likely arisen to support the complex Borreliaceae lifecycle in which these parasites have to survive in multiple hosts each with unique immune responses. Results We constructed a Borreliaceae family-level pangenome and characterized the phylogenetic relationships among the constituent taxa which supports the recent taxonomy of splitting the family into at least two genera. Gene content profiles were created for the majority of the Borreliaceae replicons, providing for the first time their unambiguous molecular typing. Conclusion Our characterization of the Borreliaceae pan-genome supports the splitting of the former Borrelia genus into two genera and provides for the phylogenetic placement of several non-species designated isolates. Mining this family-level pangenome will enable precision diagnostics corresponding to gene content-driven clinical outcomes while also providing targets for interventions.}, } @article {pmid38943059, year = {2024}, author = {Eynard, SE and Klopp, C and Canale-Tabet, K and Marande, W and Vandecasteele, C and Roques, C and Donnadieu, C and Boone, Q and Servin, B and Vignal, A}, title = {The black honey bee genome: insights on specific structural elements and a first step towards pangenomes.}, journal = {Genetics, selection, evolution : GSE}, volume = {56}, number = {1}, pages = {51}, pmid = {38943059}, issn = {1297-9686}, mesh = {Bees/genetics ; Animals ; *Genome, Insect ; }, abstract = {BACKGROUND: The honey bee reference genome, HAv3.1, was produced from a commercial line sample that was thought to have a largely dominant Apis mellifera ligustica genetic background. Apis mellifera mellifera, often referred to as the black bee, has a separate evolutionary history and is the original type in western and northern Europe. Growing interest in this subspecies for conservation and non-professional apicultural practices, together with the necessity of deciphering genome backgrounds in hybrids, triggered the necessity for a specific genome assembly. Moreover, having several high-quality genomes is becoming key for taking structural variations into account in pangenome analyses.

RESULTS: Pacific Bioscience technology long reads were produced from a single haploid black bee drone. Scaffolding contigs into chromosomes was done using a high-density genetic map. This allowed for re-estimation of the recombination rate, which was over-estimated in some previous studies due to mis-assemblies, which resulted in spurious inversions in the older reference genomes. The sequence continuity obtained was very high and the only limit towards continuous chromosome-wide sequences seemed to be due to tandem repeat arrays that were usually longer than 10 kb and that belonged to two main families, the 371 and 91 bp repeats, causing problems in the assembly process due to high internal sequence similarity. Our assembly was used together with the reference genome to genotype two structural variants by a pangenome graph approach with Graphtyper2. Genotypes obtained were either correct or missing, when compared to an approach based on sequencing depth analysis, and genotyping rates were 89 and 76% for the two variants.

CONCLUSIONS: Our new assembly for the Apis mellifera mellifera honey bee subspecies demonstrates the utility of multiple high-quality genomes for the genotyping of structural variants, with a test case on two insertions and deletions. It will therefore be an invaluable resource for future studies, for instance by including structural variants in GWAS. Having used a single haploid drone for sequencing allowed a refined analysis of very large tandem repeat arrays, raising the question of their function in the genome. High quality genome assemblies for multiple subspecies such as presented here, are crucial for emerging projects using pangenomes.}, } @article {pmid38940135, year = {2024}, author = {Shivakumar, VS and Ahmed, OY and Kovaka, S and Zakeri, M and Langmead, B}, title = {Sigmoni: classification of nanopore signal with a compressed pangenome index.}, journal = {Bioinformatics (Oxford, England)}, volume = {40}, number = {Supplement_1}, pages = {i287-i296}, doi = {10.1093/bioinformatics/btae213}, pmid = {38940135}, issn = {1367-4811}, support = {//Advanced Research Computing at Hopkins/ ; OAC 1920103//National Science Foundation/ ; R01HG011392/HG/NHGRI NIH HHS/United States ; IIBR 2029552//National Science Foundation/ ; U01CA253481/NH/NIH HHS/United States ; RGP0025/2021//Human Frontier Science Program/ ; }, mesh = {Humans ; *Algorithms ; Nanopore Sequencing/methods ; Software ; Nanopores ; Genome, Human ; Genomics/methods ; Sequence Analysis, DNA/methods ; }, abstract = {SUMMARY: Improvements in nanopore sequencing necessitate efficient classification methods, including pre-filtering and adaptive sampling algorithms that enrich for reads of interest. Signal-based approaches circumvent the computational bottleneck of basecalling. But past methods for signal-based classification do not scale efficiently to large, repetitive references like pangenomes, limiting their utility to partial references or individual genomes. We introduce Sigmoni: a rapid, multiclass classification method based on the r-index that scales to references of hundreds of Gbps. Sigmoni quantizes nanopore signal into a discrete alphabet of picoamp ranges. It performs rapid, approximate matching using matching statistics, classifying reads based on distributions of picoamp matching statistics and co-linearity statistics, all in linear query time without the need for seed-chain-extend. Sigmoni is 10-100× faster than previous methods for adaptive sampling in host depletion experiments with improved accuracy, and can query reads against large microbial or human pangenomes. Sigmoni is the first signal-based tool to scale to a complete human genome and pangenome while remaining fast enough for adaptive sampling applications.

Sigmoni is implemented in Python, and is available open-source at https://github.com/vshiv18/sigmoni.}, } @article {pmid38934790, year = {2024}, author = {Cohen, ZP and Perkin, LC and Wagner, TA and Liu, J and Bell, AA and Arick Ii, MA and Grover, CE and Yu, JZ and Udall, JA and Suh, CP}, title = {Nematode-resistance loci in Upland cotton genomes are associated with structural differences.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkae140}, pmid = {38934790}, issn = {2160-1836}, abstract = {Reniform and root-knot nematode are two of the most destructive pests of conventional upland cotton, Gossypium hirsutum, L. and continue to be a major threat to cotton fiber production in semi-arid regions of the southern United States and Central America. Fortunately, naturally occurring tolerance to these nematodes has been identified in the Pima cotton species (G. barbadense) and several upland cotton varieties (G. hirsutum), which has led to a robust breeding program that has successfully introgressed and stacked these independent resistant traits into several upland cotton lineages with superior agronomic traits, e.g. BAR 32-30 and BARBREN-713. This work identifies the genomic variations of these nematode tolerant accessions by comparing their respective genomes to the susceptible, high-quality fiber producing parental line of this lineage: Phytogen 355 (PSC355). We discover several large genomic differences within marker regions that harbor putative resistance genes as well as expression mechanisms shared by the two resistant lines, with respect to the susceptible PSC355 parental line. This work emphasizes the utility of whole genome comparisons as a means of elucidating large and small nuclear differences by lineage and phenotype. .}, } @article {pmid38934646, year = {2024}, author = {Raghuram, V and Petit, RA and Karol, Z and Mehta, R and Weissman, DB and Read, TD}, title = {Average nucleotide identity-based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0014324}, doi = {10.1128/msystems.00143-24}, pmid = {38934646}, issn = {2379-5077}, abstract = {UNLABELLED: Staphylococcus aureus causes both hospital- and community-acquired infections in humans worldwide. Due to the high incidence of infection, S. aureus is also one of the most sampled and sequenced pathogens today, providing an outstanding resource to understand variation at the bacterial subspecies level. We processed and downsampled 83,383 public S. aureus Illumina whole-genome shotgun sequences and 1,263 complete genomes to produce 7,954 representative substrains. Pairwise comparison of average nucleotide identity revealed a natural boundary of 99.5% that could be used to define 145 distinct strains within the species. We found that intermediate frequency genes in the pangenome (present in 10%-95% of genomes) could be divided into those closely linked to strain background ("strain-concentrated") and those highly variable within strains ("strain-diffuse"). Non-core genes had different patterns of chromosome location. Notably, strain-diffuse genes were associated with prophages; strain-concentrated genes were associated with the vSaβ genome island and rare genes (<10% frequency) concentrated near the origin of replication. Antibiotic resistance genes were enriched in the strain-diffuse class, while virulence genes were distributed between strain-diffuse, strain-concentrated, core, and rare classes. This study shows how different patterns of gene movement help create strains as distinct subspecies entities and provide insight into the diverse histories of important S. aureus functions.

IMPORTANCE: We analyzed the genomic diversity of Staphylococcus aureus, a globally prevalent bacterial species that causes serious infections in humans. Our goal was to build a genetic picture of the different strains of S. aureus and which genes may be associated with them. We reprocessed >84,000 genomes and subsampled to remove redundancy. We found that individual samples sharing >99.5% of their genome could be grouped into strains. We also showed that a portion of genes that are present in intermediate frequency in the species are strongly associated with some strains but completely absent from others, suggesting a role in strain specificity. This work lays the foundation for understanding individual gene histories of the S. aureus species and also outlines strategies for processing large bacterial genomic data sets.}, } @article {pmid38934605, year = {2024}, author = {Burcham, ZM}, title = {Comparative genomic analysis of an emerging Pseudomonadaceae member, Thiopseudomonas alkaliphila.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0415723}, doi = {10.1128/spectrum.04157-23}, pmid = {38934605}, issn = {2165-0497}, abstract = {Thiopseudomonas alkaliphila, an organism recently classified within the Pseudomonadaceae family, has been detected in diverse sources such as human tissues, animal guts, industrial fermenters, and decomposition environments, suggesting a diverse ecological role. However, a large knowledge gap exists in how T. alkaliphila functions. In this comparative genomic analysis, adaptations indicative of habitat specificity among strains and genomic similarity to known opportunistic pathogens are revealed. Genomic investigation reveals a core metabolic utilization of multiple oxidative and non-oxidative catabolic pathways, suggesting adaptability to varied environments and carbon sources. The genomic repertoire of T. alkaliphila includes secondary metabolites, such as antimicrobials and siderophores, indicative of its involvement in microbial competition and resource acquisition. Additionally, the presence of transposases, prophages, plasmids, and Clustered Regularly Interspaced Short Palindromic Repeats-Cas systems in T. alkaliphila genomes suggests mechanisms for horizontal gene transfer and defense against viral predation. This comprehensive genomic analysis expands our understanding on the ecological functions, community interactions, and potential virulence of T. alkaliphila, while emphasizing its adaptability and diverse capabilities across environmental and host-associated ecosystems.IMPORTANCEAs the microbial world continues to be explored, new organisms will emerge with beneficial and/or pathogenetic impact. Thiopseudomonas alkaliphila is a species originally isolated from clinical human tissue and fluid samples but has not been attributed to disease. Since its classification, T. alkaliphila has been found in animal guts, animal waste, decomposing remains, and biogas fermentation reactors. This is the first study to provide an in-depth view of the metabolic potential of publicly available genomes belonging to this species through a comparative genomics and draft pangenome calculation approach. It was found that T. alkaliphila is metabolically versatile and likely adapts to diverse energy sources and environments, which may make it useful for bioremediation and in industrial settings. A range of virulence factors and antibiotic resistances were also detected, suggesting T. alkaliphila may operate as an undescribed opportunistic pathogen.}, } @article {pmid38934546, year = {2024}, author = {Oles, RE and Carrillo Terrazas, M and Loomis, LR and Hsu, C-Y and Tribelhorn, C and Belda-Ferre, P and Ea, AC and Bryant, M and Young, JA and Carrow, HC and Sandborn, WJ and Dulai, PS and Sivagnanam, M and Pride, D and Knight, R and Chu, H}, title = {Pangenome comparison of Bacteroides fragilis genomospecies unveils genetic diversity and ecological insights.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0051624}, doi = {10.1128/msystems.00516-24}, pmid = {38934546}, issn = {2379-5077}, abstract = {UNLABELLED: Bacteroides fragilis is a Gram-negative commensal bacterium commonly found in the human colon, which differentiates into two genomospecies termed divisions I and II. Through a comprehensive collection of 694 B. fragilis whole genome sequences, we identify novel features distinguishing these divisions. Our study reveals a distinct geographic distribution with division I strains predominantly found in North America and division II strains in Asia. Additionally, division II strains are more frequently associated with bloodstream infections, suggesting a distinct pathogenic potential. We report differences between the two divisions in gene abundance related to metabolism, virulence, stress response, and colonization strategies. Notably, division II strains harbor more antimicrobial resistance (AMR) genes than division I strains. These findings offer new insights into the functional roles of division I and II strains, indicating specialized niches within the intestine and potential pathogenic roles in extraintestinal sites.

IMPORTANCE: Understanding the distinct functions of microbial species in the gut microbiome is crucial for deciphering their impact on human health. Classifying division II strains as Bacteroides fragilis can lead to erroneous associations, as researchers may mistakenly attribute characteristics observed in division II strains to the more extensively studied division I B. fragilis. Our findings underscore the necessity of recognizing these divisions as separate species with distinct functions. We unveil new findings of differential gene prevalence between division I and II strains in genes associated with intestinal colonization and survival strategies, potentially influencing their role as gut commensals and their pathogenicity in extraintestinal sites. Despite the significant niche overlap and colonization patterns between these groups, our study highlights the complex dynamics that govern strain distribution and behavior, emphasizing the need for a nuanced understanding of these microorganisms.}, } @article {pmid38931094, year = {2024}, author = {Mather, D and Vassos, E and Sheedy, J and Guo, W and McKay, A}, title = {A Quantitative Trait Locus with a Major Effect on Root-Lesion Nematode Resistance in Barley.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {12}, pages = {}, doi = {10.3390/plants13121663}, pmid = {38931094}, issn = {2223-7747}, support = {DAS00141, UA00143, USQ00019 USQ1702-007RTSX//Grains Research and Development Corporation/ ; }, abstract = {Although the root-lesion nematode Pratylenchus thornei is known to affect barley (Hordeum vulgare L.), there have been no reports on the genetic control of P. thornei resistance in barley. In this research, P. thornei resistance was assessed for a panel of 46 barley mapping parents and for two mapping populations (Arapiles/Franklin and Denar/Baudin). With both populations, a highly significant quantitative trait locus (QTL) was mapped at the same position on the long arm of chromosome 7H. Single-nucleotide polymorphisms (SNPs) in this region were anchored to an RGT Planet pan-genome assembly and assayed on the mapping parents and other barley varieties. The results indicate that Arapiles, Denar, RGT Planet and several other varieties likely have the same resistance gene on chromosome 7H. Marker assays reported here could be used to select for P. thornei resistance in barley breeding. Analysis of existing barley pan-genomic and pan-transcriptomic data provided a list of candidate genes along with information on the expression and differential expression of some of those genes in barley root tissue. Further research is required to identify a specific barley gene that affects root-lesion nematode resistance.}, } @article {pmid38926873, year = {2024}, author = {Sierra, P and Durbin, R}, title = {Identification of transposable element families from pangenome polymorphisms.}, journal = {Mobile DNA}, volume = {15}, number = {1}, pages = {13}, pmid = {38926873}, issn = {1759-8753}, support = {956229//Horizon 2020 Framework Programme/ ; 207492/WT_/Wellcome Trust/United Kingdom ; }, abstract = {BACKGROUND: Transposable Elements (TEs) are segments of DNA, typically a few hundred base pairs up to several tens of thousands bases long, that have the ability to generate new copies of themselves in the genome. Most existing methods used to identify TEs in a newly sequenced genome are based on their repetitive character, together with detection based on homology and structural features. As new high quality assemblies become more common, including the availability of multiple independent assemblies from the same species, an alternative strategy for identification of TE families becomes possible in which we focus on the polymorphism at insertion sites caused by TE mobility.

RESULTS: We develop the idea of using the structural polymorphisms found in pangenomes to create a library of the TE families recently active in a species, or in a closely related group of species. We present a tool, pantera, that achieves this task, and illustrate its use both on species with well-curated libraries, and on new assemblies.

CONCLUSIONS: Our results show that pantera is sensitive and accurate, tending to correctly identify complete elements with precise boundaries, and is particularly well suited to detect larger, low copy number TEs that are often undetected with existing de novo methods.}, } @article {pmid38921378, year = {2024}, author = {Casimiro-Ramos, A and Bautista-Crescencio, C and Vidal-Montiel, A and González, GM and Hernández-García, JA and Hernández-Rodríguez, C and Villa-Tanaca, L}, title = {Comparative Genomics of the First Resistant Candida auris Strain Isolated in Mexico: Phylogenomic and Pan-Genomic Analysis and Mutations Associated with Antifungal Resistance.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {10}, number = {6}, pages = {}, doi = {10.3390/jof10060392}, pmid = {38921378}, issn = {2309-608X}, support = {SIP 20240946//Instituto Politécnico Nacional/ ; SIP 20231481//Instituto Politécnico Nacional/ ; }, abstract = {Candida auris is an emerging multidrug-resistant and opportunistic pathogenic yeast. Whole-genome sequencing analysis has defined five major clades, each from a distinct geographic region. The current study aimed to examine the genome of the C. auris 20-1498 strain, which is the first isolate of this fungus identified in Mexico. Based on whole-genome sequencing, the draft genome was found to contain 70 contigs. It had a total genome size of 12.86 Mbp, an N50 value of 1.6 Mbp, and an average guanine-cytosine (GC) content of 45.5%. Genome annotation revealed a total of 5432 genes encoding 5515 proteins. According to the genomic analysis, the C. auris 20-1498 strain belongs to clade IV (containing strains endemic to South America). Of the two genes (ERG11 and FKS1) associated with drug resistance in C. auris, a mutation was detected in K143R, a gene located in a mutation hotspot of ERG11 (lanosterol 14-α-demethylase), an antifungal drug target. The focus on whole-genome sequencing and the identification of mutations linked to the drug resistance of fungi could lead to the discovery of new therapeutic targets and new antifungal compounds.}, } @article {pmid38920366, year = {2024}, author = {Ardalani, O and Phaneuf, PV and Mohite, OS and Nielsen, LK and Palsson, BO}, title = {Pangenome reconstruction of Lactobacillaceae metabolism predicts species-specific metabolic traits.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0015624}, doi = {10.1128/msystems.00156-24}, pmid = {38920366}, issn = {2379-5077}, abstract = {Strains across the Lactobacillaceae family form the basis for a trillion-dollar industry. Our understanding of the genomic basis for their key traits is fragmented, however, including the metabolism that is foundational to their industrial uses. Pangenome analysis of publicly available Lactobacillaceae genomes allowed us to generate genome-scale metabolic network reconstructions for 26 species of industrial importance. Their manual curation led to more than 75,000 gene-protein-reaction associations that were deployed to generate 2,446 genome-scale metabolic models. Cross-referencing genomes and known metabolic traits allowed for manual metabolic network curation and validation of the metabolic models. As a result, we provide the first pangenomic basis for metabolism in the Lactobacillaceae family and a collection of predictive computational metabolic models that enable a variety of practical uses.IMPORTANCELactobacillaceae, a bacterial family foundational to a trillion-dollar industry, is increasingly relevant to biosustainability initiatives. Our study, leveraging approximately 2,400 genome sequences, provides a pangenomic analysis of Lactobacillaceae metabolism, creating over 2,400 curated and validated genome-scale models (GEMs). These GEMs successfully predict (i) unique, species-specific metabolic reactions; (ii) niche-enriched reactions that increase organism fitness; (iii) essential media components, offering insights into the global amino acid essentiality of Lactobacillaceae; and (iv) fermentation capabilities across the family, shedding light on the metabolic basis of Lactobacillaceae-based commercial products. This quantitative understanding of Lactobacillaceae metabolic properties and their genomic basis will have profound implications for the food industry and biosustainability, offering new insights and tools for strain selection and manipulation.}, } @article {pmid38919498, year = {2024}, author = {Zambounis, A and Boutsika, A and Gray, N and Hossain, M and Chatzidimopoulos, M and Tsitsigiannis, DI and Paplomatas, E and Hane, J}, title = {Pan-genome survey of Septoria pistaciarum, causal agent of Septoria leaf spot of pistachios, across three Aegean sub-regions of Greece.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1396760}, pmid = {38919498}, issn = {1664-302X}, abstract = {Septoria pistaciarum, a causal agent of Septoria leaf spot disease of pistachio, is a fungal pathogen that causes substantial losses in the cultivation, worldwide. This study describes the first pan-genome-based survey of this phytopathogen-comprising a total of 27 isolates, with 9 isolates each from 3 regional units of Greece (Pieria, Larissa and Fthiotida). The reference isolate (SPF8) assembled into a total of 43.1 Mb, with 38.6% contained within AT-rich regions of approximately 37.5% G:C. The genomes of the 27 isolates exhibited on average 42% gene-coding and 20% repetitive regions. The genomes of isolates from the southern Fthiotida region appeared to more diverged from each other than the other regions based on SNP-derived trees, and also contained isolates similar to both the Pieria and Larissa regions. In contrast, isolates of the Pieria and Larissa were less diverse and distinct from one another. Asexual reproduction appeared to be typical, with no MAT1-2 locus detected in any isolate. Genome-based prediction of infection mode indicated hemibiotrophic and saprotrophic adaptations, consistent with its long latent phase. Gene prediction and orthology clustering generated a pan-genome-wide gene set of 21,174 loci. A total of 59 ortholog groups were predicted to contain candidate effector proteins, with 36 (61%) of these either having homologs to known effectors from other species or could be assigned predicted functions from matches to conserved domains. Overall, effector prediction suggests that S. pistaciarum employs a combination of defensive effectors with roles in suppression of host defenses, and offensive effectors with a range of cytotoxic activities. Some effector-like ortholog groups presented as divergent versions of the same protein, suggesting region-specific adaptations may have occurred. These findings provide insights and future research directions in uncovering the pathogenesis and population dynamics of S. pistaciarum toward the efficient management of Septoria leaf spot of pistachio.}, } @article {pmid38918389, year = {2024}, author = {Hämälä, T and Moore, C and Cowan, L and Carlile, M and Gopaulchan, D and Brandrud, MK and Birkeland, S and Loose, M and Kolář, F and Koch, MA and Yant, L}, title = {Impact of whole-genome duplications on structural variant evolution in Cochlearia.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {5377}, pmid = {38918389}, issn = {2041-1723}, support = {679056//EC | EU Framework Programme for Research and Innovation H2020 | H2020 Excellent Science (H2020 Priority Excellent Science)/ ; 850852//EC | EU Framework Programme for Research and Innovation H2020 | H2020 Excellent Science (H2020 Priority Excellent Science)/ ; RPG-2020-367//Leverhulme Trust/ ; }, mesh = {*Polyploidy ; *Genome, Plant/genetics ; *Evolution, Molecular ; *Gene Duplication ; Genomic Structural Variation/genetics ; Mutation ; }, abstract = {Polyploidy, the result of whole-genome duplication (WGD), is a major driver of eukaryote evolution. Yet WGDs are hugely disruptive mutations, and we still lack a clear understanding of their fitness consequences. Here, we study whether WGDs result in greater diversity of genomic structural variants (SVs) and how they influence evolutionary dynamics in a plant genus, Cochlearia (Brassicaceae). By using long-read sequencing and a graph-based pangenome, we find both negative and positive interactions between WGDs and SVs. Masking of recessive mutations due to WGDs leads to a progressive accumulation of deleterious SVs across four ploidal levels (from diploids to octoploids), likely reducing the adaptive potential of polyploid populations. However, we also discover putative benefits arising from SV accumulation, as more ploidy-specific SVs harbor signals of local adaptation in polyploids than in diploids. Together, our results suggest that SVs play diverse and contrasting roles in the evolutionary trajectories of young polyploids.}, } @article {pmid38916318, year = {2024}, author = {Lypaczewski, P and Chac, D and Dunmire, CN and Tandoc, KM and Chowdhury, F and Khan, AI and Bhuiyan, TR and Harris, JB and LaRocque, RC and Calderwood, SB and Ryan, ET and Qadri, F and Shapiro, BJ and Weil, AA}, title = {Vibrio cholerae O1 experiences mild bottlenecks through the gastrointestinal tract in some but not all cholera patients.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0078524}, doi = {10.1128/spectrum.00785-24}, pmid = {38916318}, issn = {2165-0497}, abstract = {UNLABELLED: Vibrio cholerae O1 causes the diarrheal disease cholera, and the small intestine is the site of active infection. During cholera, cholera toxin is secreted from V. cholerae and induces a massive fluid influx into the small intestine, which causes vomiting and diarrhea. Typically, V. cholerae genomes are sequenced from bacteria passed in stool, but rarely from vomit, a fluid that may more closely represents the site of active infection. We hypothesized that V. cholerae O1 population bottlenecks along the gastrointestinal tract would result in reduced genetic variation in stool compared to vomit. To test this, we sequenced V. cholerae genomes from 10 cholera patients with paired vomit and stool samples. Genetic diversity was low in both vomit and stool, consistent with a single infecting population rather than coinfection with divergent V. cholerae O1 lineages. The amount of single-nucleotide variation decreased from vomit to stool in four patients, increased in two, and remained unchanged in four. The variation in gene presence/absence decreased between vomit and stool in eight patients and increased in two. Pangenome analysis of assembled short-read sequencing demonstrated that the toxin-coregulated pilus operon more frequently contained deletions in genomes from vomit compared to stool. However, these deletions were not detected by PCR or long-read sequencing, indicating that interpreting gene presence or absence patterns from short-read data alone may be incomplete. Overall, we found that V. cholerae O1 isolated from stool is genetically