Source code for pyleotups.utils.Publication

__all__ = ['Publication']

import re
import numpy as np

[docs] class Publication: """ Represents a publication within a study. Attributes ---------- author : str The name of the author(s) of the publication. title : str The title of the publication. journal : str The journal where the publication appeared. year : str The publication year. volume : str or None The volume number (if applicable). number : str or None The issue number (if applicable). pages : str or None The page numbers (if applicable). pub_type : str or None The type of publication. doi : str or None The Digital Object Identifier. url : str or None URL for the publication. study_id : str or None The NOAA study ID to which this publication belongs. """ def __init__(self, pub_data): """ Initialize a Publication instance. Parameters ---------- pub_data : dict Dictionary containing publication data. """ author_data = pub_data.get('author') self.author = ( author_data.get('name') if isinstance(author_data, dict) else 'Unknown Author' ) self.title = pub_data.get('title') or 'Unknown Title' self.journal = pub_data.get('journal') or 'Unknown Journal' self.year = pub_data.get('pubYear') or 'Unknown Year' self.citation = pub_data.get('citation') or '' self.volume = pub_data.get('volume') or np.nan self.number = pub_data.get('issue') or np.nan self.pages = pub_data.get('pages') or np.nan self.pub_type = pub_data.get('type') or np.nan identifier_info = pub_data.get('identifier') or {} self.doi = identifier_info.get('id', np.nan) if identifier_info else np.nan self.url = identifier_info.get('url', np.nan) if identifier_info else np.nan self.study_id = None
[docs] def get_citation_key(self): """ Generate a unique citation key for the publication. Returns ------- str A citation key in the format: "<LastName>_<FirstSignificantWord>_<Year>_<StudyID>". """ if isinstance(self.author, str) and self.author.strip(): last_name = self.author.strip().split()[-1] else: last_name = "UnknownAuthor" # Ensure `title` is a string before regex title = self.title if isinstance(self.title, str) else "UnknownTitle" words = re.findall(r'\w+', title) first_significant_word = next( (word.capitalize() for word in words if len(word) > 2 and word.lower() != "the"), "Unknown" ) # Handle year and study_id year = str(self.year) if self.year else "UnknownYear" study_id = str(self.study_id) if self.study_id else "UnknownID" # Assemble key return f"{last_name}_{first_significant_word}_{year}_{study_id}".replace(" ", "")
def to_bibtex_entry(self): from pybtex.database import Entry fields = { "author": self.author, "title": self.title, "journal": self.journal, "year": str(self.year) if self.year else "Unknown", "doi": self.doi, "url": self.url, } fields = {k: v for k, v in fields.items() if v} # Drop empty values return Entry("article", fields=fields)
[docs] def to_dict(self): """ Convert the publication data into a dictionary. Returns ------- dict A dictionary representation of the publication. """ return { "Author": self.author, "Title": self.title, "Journal": self.journal, "Year": self.year, "Volume": self.volume, "Number": self.number, "Pages": self.pages, "Type": self.pub_type, "DOI": self.doi, "URL": self.url, "CitationKey": self.get_citation_key() if self.study_id else None }