Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions weatherbenchX/data_loaders/sparse_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
# limitations under the License.
"""Data loaders for tabular data stored in Parquet format."""

from typing import Callable, Hashable, Mapping, Optional, Sequence, Union
import functools
import os
from typing import Callable, Hashable, Mapping, Optional, Sequence, Union
import numpy as np
import pandas as pd
import pyarrow
Expand Down Expand Up @@ -54,9 +55,7 @@ def parquet_filename_for_time(path: str, time: np.datetime64, unit: str) -> str:
year = time.item().year
month = time.item().month
if unit == 'M':
fn = (
f'year={year}/month={month}/{year}-{str(month).zfill(2)}.parquet'
)
fn = f'year={year}/month={month}/{year}-{str(month).zfill(2)}.parquet'
elif unit == 'D':
day = time.item().day
fn = f'year={year}/month={month}/day={day}/{year}-{str(month).zfill(2)}-{str(day).zfill(2)}.parquet'
Expand Down Expand Up @@ -472,10 +471,17 @@ def __init__(
remove_duplicates: bool = False,
pick_closest_duplicate_by: Optional[str] = None,
file_tolerance: np.timedelta64 = np.timedelta64(1, 'h'),
apply_fn: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None,
preprocessing_fn: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None,
interpolation: Optional[interpolations.Interpolation] = None,
):
def metar_preprocessing_fn(df: pd.DataFrame):
def metar_preprocessing_fn(
df: pd.DataFrame,
preprocessing_fn: Optional[
Callable[[pd.DataFrame], pd.DataFrame]
] = None,
):
if preprocessing_fn is not None:
df = preprocessing_fn(df)
df = set_bad_quality_to_nan(
df,
# Rename to raw variables since this happens before renaming.
Expand Down Expand Up @@ -506,6 +512,8 @@ def metar_preprocessing_fn(df: pd.DataFrame):
remove_duplicates=remove_duplicates,
pick_closest_duplicate_by=pick_closest_duplicate_by,
file_tolerance=file_tolerance,
preprocessing_fn=metar_preprocessing_fn,
preprocessing_fn=functools.partial(
metar_preprocessing_fn, preprocessing_fn=preprocessing_fn
),
interpolation=interpolation,
)
Loading