Source code for pyleotups.utils.PaleoData

__all__ = ['PaleoData']

import numpy as np

[docs] class PaleoData: """ Represents paleo data associated with a site, including multiple data files and full variable metadata per file. Attributes ---------- datatable_id : str Unique NOAA data table identifier. dataTableName : str Name of the data table. timeUnit : str Time unit used in the data table. files : list of dict List of raw file info dicts. file_variable_map : dict Maps fileUrl to a dict of variables and their full metadata. file_url : str or np.nan Shortcut to first file URL (for backward compatibility). variables : list of str Shortcut to variable names in first file (for backward compatibility). """ def __init__(self, paleo_data, study_id, site_id): self.datatable_id = paleo_data.get('NOAADataTableId', np.nan) self.dataTableName = paleo_data.get('dataTableName', np.nan) self.timeUnit = paleo_data.get('timeUnit', np.nan) self.study_id = study_id self.site_id = site_id self.files = [] self.file_variable_map = {} for file_info in paleo_data.get('dataFile', []): if not (isinstance(file_info, dict) and isinstance(file_info.get('fileUrl'), str)): continue file_url = file_info.get('fileUrl').strip() if not file_url: continue self.files.append(file_info) # Safely extract variable metadata variables_meta = {} used_names = {} for i, var in enumerate(file_info.get('variables', []), start=1): if not isinstance(var, dict): continue # Resolve variable name (fallback logic) if isinstance(var.get("cvShortName"), str) and var["cvShortName"].strip(): base_name = var["cvShortName"].strip() elif isinstance(var.get("cvWhat"), str) and ">" in var["cvWhat"]: base_name = var["cvWhat"].split(">")[-1].strip() else: base_name = f"Var{i}" # Ensure unique variable name count = used_names.get(base_name, 0) var_name = base_name if count == 0 else f"{base_name}_{count}" used_names[base_name] = count + 1 # Store full variable metadata dictionary variables_meta[var_name] = { "cvDataType": var.get("cvDataType"), "cvWhat": var.get("cvWhat"), "cvMaterial": var.get("cvMaterial"), "cvError": var.get("cvError"), "cvUnit": var.get("cvUnit"), "cvSeasonality": var.get("cvSeasonality"), "cvDetail": var.get("cvDetail"), "cvMethod": var.get("cvMethod"), "cvAdditionalInfo": var.get("cvAdditionalInfo"), "cvFormat": var.get("cvFormat"), "cvShortName": var.get("cvShortName") } self.file_variable_map[file_url] = variables_meta # Compatibility for older uses if self.files: selected_file = self.files[0] self.file_url = selected_file.get('fileUrl', np.nan) # Use resolved names only self.variables = list(self.file_variable_map.get(self.file_url, {}).keys()) else: self.file_url = np.nan self.variables = []
[docs] def to_dict(self, file_obj=None): """ Convert PaleoData into a dictionary, optionally for a specific file. Parameters ---------- file_obj : dict, optional Specific file object (default is first file). Returns ------- dict Dictionary of core metadata for one file. """ selected_file = file_obj if file_obj else (self.files[0] if self.files else {}) file_url = selected_file.get("fileUrl", np.nan) return { "DataTableID": self.datatable_id, "DataTableName": self.dataTableName, "TimeUnit": self.timeUnit, "FileURL": file_url, "Variables": list(self.file_variable_map.get(file_url, {}).keys()), "FileDescription": selected_file.get("urlDescription", np.nan), "TotalFilesAvailable": len(self.files) }