Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
396 changes: 396 additions & 0 deletions src/Plugin/search_api/processor/EDTFDateProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,396 @@
<?php

namespace Drupal\controlled_access_terms\Plugin\search_api\processor;

use Drupal\search_api\Processor\ProcessorPluginBase;
use Drupal\search_api\Processor\ProcessorProperty;
use Drupal\search_api\Datasource\DatasourceInterface;
use Drupal\search_api\Item\ItemInterface;
use Drupal\search_api\Plugin\PluginFormTrait;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;

/**
* Provides a Search API processor for indexing EDTF dates in Solr.
*
* @SearchApiProcessor(
* id = "edtf_date_processor",
* label = @Translation("EDTF Date Processor"),
* description = @Translation("Indexes EDTF dates (single or multiple) as Solr Date Types."),
* stages = {
* "add_properties" = 0,
* }
* )
*/
class EDTFDateProcessor extends ProcessorPluginBase implements PluginFormInterface {
use PluginFormTrait;
use StringTranslationTrait;

/**
* Stores plugin configuration.
*
* @var array
*/
protected $configuration;

/**
* {@inheritdoc}
*/
public function defaultConfiguration() {
return [
'fields' => [],
'open_start_year' => 0,
'open_end_year' => '',
];
}

/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$form['#description'] = $this->t('Select the EDTF fields to extract dates from.
<br>Note: The following EDTF date formats are supported:
<ul>
<li>Year only: YYYY (e.g. "2012")</li>
<li>Year and month only: YYYY-MM (e.g. "2012-05")</li>
<li>Full date: YYYY-MM-DD (e.g. "2012-05-01")</li>
<li>Multiple dates:{YYYY, YYYY-MM, YYYY-MM-DD, ...}</li>
<li>Date ranges: YYYY/YYYY</li>
<li> Dates with unknown parts: YYYY-X, YYYY-MM-X, YYYY-MM-DD-X</li>
<ul>');

$fields = \Drupal::entityTypeManager()
->getStorage('field_config')
->loadByProperties(['field_type' => 'edtf']);

$fields_options = [];
foreach ($fields as $field) {
$key = $field->getTargetEntityTypeId() . '|' . $field->getName();
$fields_options[$key] = $this->t(
'@label (Entity: @entity_type)', [
'@label' => $field->label(),
'@entity_type' => $field->getTargetEntityTypeId(),
]
);
}

$form['fields'] = [
'#type' => 'select',
'#multiple' => TRUE,
'#title' => $this->t('EDTF Fields'),
'#description' => $this->t('Select one or more EDTF fields to index.'),
'#options' => $fields_options,
'#default_value' => $this->configuration['fields'],
];

$form['open_start_year'] = [
'#type' => 'number',
'#title' => $this->t('Open Interval Begin Year'),
'#description' => $this->t('Sets the beginning year to begin indexing from. Leave blank if you would like to index from year 1000.'),
'#default_value' => $this->configuration['open_start_year'],
];
$form['open_end_year'] = [
'#type' => 'number',
'#title' => $this->t('Open Interval End Year'),
'#description' => $this->t('Sets the end year to end indexing at. Leave blank if you would like to index up to date 9999.'),
'#default_value' => $this->configuration['open_end_year'],
];

return $form;
}

/**
* {@inheritdoc}
*/
public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
if ($form_state->getValue('open_start_year') < 0) {
$form_state->setErrorByName('open_start_year', $this->t('Open start year must be a positive integer.'));
}

if (!empty($form_state->getValue('open_end_year')) && $form_state->getValue('open_end_year') < $form_state->getValue('open_start_year')) {
$form_state->setErrorByName('open_end_year', $this->t('Open end year must be greater than or equal to open start year.'));
}
}

/**
* {@inheritdoc}
*/
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
$this->configuration['fields'] = $form_state->getValue('fields', []);
$this->configuration['ignore_open_start'] = $form_state->getValue('ignore_open_start');
$this->configuration['ignore_open_end'] = $form_state->getValue('ignore_open_end');
$this->configuration['open_start_year'] = $form_state->getValue('open_start_year');
$this->configuration['open_end_year'] = $form_state->getValue('open_end_year');
}

/**
* {@inheritdoc}
*/
public function getPropertyDefinitions(?DatasourceInterface $datasource = NULL) {
$properties = [];

if (!$datasource) {
$data_definition = \Drupal::typedDataManager()->createDataDefinition('datetime_iso8601')
->setLabel($this->t('EDTF Dates'))
->setDescription($this->t('Indexes single EDTF dates or multiple separate dates.'));

$properties['edtf_dates'] = new ProcessorProperty(
[
'label' => $this->t('EDTF Dates'),
'description' => $this->t('Indexes single EDTF dates or multiple separate dates.'),
'type' => 'datetime_iso8601',
'is_list' => TRUE,
'processor_id' => $this->getPluginId(),
'data_definition' => $data_definition,
]
);
}

return $properties;
}

/**
* {@inheritdoc}
*/
public function addFieldValues(ItemInterface $item) {
$entity = $item->getOriginalObject()->getValue();
$edtfDates = [];

foreach ($this->configuration['fields'] as $field_key) {
if (strpos($field_key, '|') === FALSE) {
continue;
}
[$entity_type, $field_name] = explode('|', $field_key, 2);
if ($entity->getEntityTypeId() !== $entity_type) {
continue;
}
if (!$entity->hasField($field_name)) {
continue;
}

$field_values = $entity->get($field_name)->getValue();
foreach ($field_values as $date_item) {
if (!empty($date_item['value'])) {
// Sanitize the input value before processing.
$value = $this->sanitizeEdtfString($date_item['value']);

if ($this->isSingleEdtfDate($value)) {
$edtfDates[] = $this->convertEdtftoSolr($value);
}
elseif ($this->isEdtfMultiDate($value)) {
$dates = $this->convertEdtfMultiDateToSolr($value);
if ($dates) {
$edtfDates = array_merge($edtfDates, $dates);
}
}
}
}
}

$filteredDates = [];
foreach ($edtfDates as $date) {
$year = (int) substr($date, 0, 4);
if (!$this->configuration['ignore_open_start'] && $this->configuration['open_start_year'] > 0 && $year < $this->configuration['open_start_year']) {
continue;
}
if (!$this->configuration['ignore_open_end'] && !empty($this->configuration['open_end_year']) && $year > $this->configuration['open_end_year']) {
continue;
}
$filteredDates[] = $date;
}
$edtfDates = $filteredDates;

// Sort dates in ascending order.
usort(
$edtfDates, function ($a, $b) {
return strtotime($a) - strtotime($b);
}
);

if (!empty($edtfDates)) {
$fields = $this->getFieldsHelper()->filterForPropertyPath($item->getFields(), NULL, 'edtf_dates');
foreach ($fields as $field) {
$field->setValues($edtfDates);
}
}
}

/**
* Checks if the provided value is a single (possibly incomplete) EDTF date.
*
* Accepted formats:
* - YYYY (e.g. "2012")
* - YYYY-MM (e.g. "2012-05")
* - YYYY-MM-DD (e.g. "2012-05-01")
*
* Allows X placeholders.
*/
protected function isSingleEdtfDate($value) {
if (!is_string($value)) {
return FALSE;
}
if (preg_match('/^\.\.\/(.+)/', $value, $matches)) {
$start_year = !empty($this->configuration['open_start_year'])
? $this->configuration['open_start_year']
: '1000';
$value = $start_year . '/' . $matches[1];
}
if (preg_match('/(.+)\/\.\.$/', $value, $matches)) {
$end_year = !empty($this->configuration['open_end_year'])
? $this->configuration['open_end_year']
: '9999';
$value = $matches[1] . '/' . $end_year;
}
/* Check for EDTF date with a range (e.g. "YYYY/YYYY") */
if (strpos($value, '/') !== FALSE) {
$parts = explode('/', $value);
$value = trim($parts[0]);
}
/* Check for extended year notation: value starting with 'Y' */
if (strpos($value, 'Y') === 0) {
$candidate = substr($value, 1);
return preg_match('/^-?[0-9]{5,}$/', $candidate);
}
return preg_match('/^[0-9X]{4}(-[0-9X]{2}(-[0-9X]{2})?)?$/', $value);
}

/**
* Checks if is multiple EDTF date value (wrapped in curly braces).
*
* Accepted format:
* - {YYYY, YYYY-MM, YYYY-MM-DD, ...}
*/
protected function isEdtfMultiDate($value) {
return is_string($value) && preg_match('/^{\s*([0-9X]{4}(-[0-9X]{2}(-[0-9X]{2})?)?)(\s*,\s*[0-9X]{4}(-[0-9X]{2}(-[0-9X]{2})?)?)*\s*}$/', $value);
}

/**
* Converts a single EDTF date to a Solr-compatible format.
*/
protected function convertEdtftoSolr($value) {
if (preg_match('/^\.\.\/(.+)/', $value, $matches)) {
$start_year = !empty($this->configuration['open_start_year'])
? $this->configuration['open_start_year']
: '1000';
$value = $start_year . '/' . $matches[1];
}

if (preg_match('/(.+)\/\.\.$/', $value, $matches)) {
$end_year = !empty($this->configuration['open_end_year'])
? $this->configuration['open_end_year']
: '9999';
$value = $matches[1] . '/' . $end_year;
}

if (strpos($value, '/') !== FALSE) {
$parts = explode('/', $value);
$value = trim($parts[0]);
}

if (strpos($value, 'Y') === 0) {
$value = substr($value, 1);
}

// Sanitize and normalize the EDTF string.
$value = $this->sanitizeEdtfString($value);
$value = $this->normalizePlaceholders($value);

// Validate numeric constraints.
$parts = explode('-', $value);
if (count($parts) >= 2) {
$month = (int) $parts[1];
if ($month < 1 || $month > 12) {
\Drupal::logger('edtf_date_processor')->warning('Month out of acceptable range in date: "@value". Month should be between 01 and 12.', ['@value' => $value]);
}
}
if (count($parts) == 3) {
$day = (int) $parts[2];
if ($day < 1 || $day > 31) {
\Drupal::logger('edtf_date_processor')->warning('Day out of acceptable range in date: "@value". Day should be between 01 and 31.', ['@value' => $value]);
}
}

// Ensure complete date format.
switch (TRUE) {
case preg_match('/^\d{4}-\d{2}-\d{2}$/', $value):
break;

case preg_match('/^\d{4}-\d{2}$/', $value):
$value .= '-01';
break;

case preg_match('/^\d{4}$/', $value):
$value .= '-01-01';
break;

// Handle extended year notation (e.g. "YYYYY").
case preg_match('/^-?[0-9]{5,}$/', $value):
$value .= '-01-01';
\Drupal::logger('edtf_date_processor')->warning('This is an extended year date, this might parse unexpectedly and cause issues.');
break;
}
// Check if the date is the expected format after appending missing parts.
if (!preg_match('/^-?\d{4}-\d{2}-\d{2}$/', $value)) {
\Drupal::logger('edtf_date_processor')->warning('Date value after appending missing parts does not match the expected pattern: "@value".', ['@value' => $value]);
}

return $value . 'T00:00:00Z';
}

/**
* Converts a multiple EDTF date value to an array of Solr-compatible dates.
*/
protected function convertEdtfMultiDateToSolr($value) {
if (preg_match_all('/[0-9X]{4}(?:-[0-9X]{2}(?:-[0-9X]{2})?)?/', $value, $matches)) {
$converted = [];
foreach ($matches[0] as $raw_date) {
$converted[] = $this->convertEdtftoSolr($raw_date);
}
return $converted;
}
return [];
}

/**
* Removes unwanted special characters from the EDTF string.
*/
protected function sanitizeEdtfString($value) {
return str_replace(["~", "?", "%"], "", $value);
}

/**
* Replaces X placeholders in the EDTF string.
*
* For the year, replaces all X with 0.
* For month/day, replaces "XX" with "01".
*/
protected function normalizePlaceholders($value) {
$parts = explode('-', $value);
$parts[0] = str_replace('X', '0', $parts[0]);
if (isset($parts[1])) {
if ($parts[1] === 'XX') {
$parts[1] = '01';
}
else {
$parts[1] = str_replace('X', '0', $parts[1]);
if ($parts[1] === '00') {
$parts[1] = '01';
}
}
}
if (isset($parts[2])) {
if ($parts[2] === 'XX') {
$parts[2] = '01';
}
else {
$parts[2] = str_replace('X', '0', $parts[2]);
if ($parts[2] === '00') {
$parts[2] = '01';
}
}
}
return implode('-', $parts);
}

}
Loading