diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 39e90c6..e4e1cda 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -238,6 +238,33 @@ function convertTimeFormat($time) { } } +/** + * If a date value has a timezone offset, normalize it. + * @param string $dtValue + * @return string isolated, normalized TZ offset for implied TZ for other dt- properties + */ +function normalizeTimezoneOffset(&$dtValue) { + preg_match('/Z|[+-]\d{1,2}:?(\d{2})?$/i', $dtValue, $matches); + + if (empty($matches)) { + return null; + } + + if ( $matches[0] != 'Z' ) { + $timezoneString = str_replace(':', '', $matches[0]); + $plus_minus = substr($timezoneString, 0, 1); + $timezoneOffset = substr($timezoneString, 1); + if ( strlen($timezoneOffset) <= 2 ) { + $timezoneOffset .= '00'; + } + $timezoneOffset = str_pad($timezoneOffset, 4, 0, STR_PAD_LEFT); + $timezoneOffset = $plus_minus . $timezoneOffset; + $dtValue = preg_replace('/Z?[+-]\d{1,2}:?(\d{2})?$/i', $timezoneOffset, $dtValue); + } + + return $timezoneOffset; +} + function applySrcsetUrlTransformation($srcset, $transformation) { return implode(', ', array_filter(array_map(function ($srcsetPart) use ($transformation) { $parts = explode(" \t\n\r\0\x0B", trim($srcsetPart), 2); @@ -649,9 +676,10 @@ public function parseU(\DOMElement $u) { * * @param DOMElement $dt The element to parse * @param array $dates Array of dates processed so far + * @param string $impliedTimezone * @return string The datetime string found */ - public function parseDT(\DOMElement $dt, &$dates = array()) { + public function parseDT(\DOMElement $dt, &$dates = array(), &$impliedTimezone = null) { // Check for value-class pattern $valueClassChildren = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ") or contains(concat(" ", @class, " "), " value-title ")]', $dt); $dtValue = false; @@ -663,73 +691,96 @@ public function parseDT(\DOMElement $dt, &$dates = array()) { foreach ($valueClassChildren as $e) { if (strstr(' ' . $e->getAttribute('class') . ' ', ' value-title ')) { $title = $e->getAttribute('title'); - if (!empty($title)) + if (!empty($title)) { $dateParts[] = $title; + } } elseif ($e->tagName == 'img' or $e->tagName == 'area') { // Use @alt $alt = $e->getAttribute('alt'); - if (!empty($alt)) + if (!empty($alt)) { $dateParts[] = $alt; + } } elseif ($e->tagName == 'data') { // Use @value, otherwise innertext $value = $e->hasAttribute('value') ? $e->getAttribute('value') : unicodeTrim($e->nodeValue); - if (!empty($value)) + if (!empty($value)) { $dateParts[] = $value; + } } elseif ($e->tagName == 'abbr') { // Use @title, otherwise innertext $title = $e->hasAttribute('title') ? $e->getAttribute('title') : unicodeTrim($e->nodeValue); - if (!empty($title)) + if (!empty($title)) { $dateParts[] = $title; + } } elseif ($e->tagName == 'del' or $e->tagName == 'ins' or $e->tagName == 'time') { // Use @datetime if available, otherwise innertext $dtAttr = ($e->hasAttribute('datetime')) ? $e->getAttribute('datetime') : unicodeTrim($e->nodeValue); - if (!empty($dtAttr)) + if (!empty($dtAttr)) { $dateParts[] = $dtAttr; + } } else { - if (!empty($e->nodeValue)) + if (!empty($e->nodeValue)) { $dateParts[] = unicodeTrim($e->nodeValue); + } } } // Look through dateParts $datePart = ''; $timePart = ''; + $timezonePart = ''; foreach ($dateParts as $part) { // Is this part a full ISO8601 datetime? - if (preg_match('/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(?::\d{2})?(?:Z?[+|-]\d{2}:?\d{2})?$/', $part)) { + if (preg_match('/^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}(:\d{2})?(Z|[+-]\d{2}:?\d{2})?$/', $part)) { // Break completely, we’ve got our value. $dtValue = $part; break; } else { // Is the current part a valid time(+TZ?) AND no other time representation has been found? - if ((preg_match('/\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $part) or preg_match('/\d{1,2}[a|p]m/', $part)) and empty($timePart)) { + if ((preg_match('/^\d{1,2}:\d{2}(:\d{2})?(Z|[+-]\d{1,2}:?\d{2})?$/', $part) or preg_match('/^\d{1,2}(:\d{2})?(:\d{2})?[ap]\.?m\.?$/i', $part)) and empty($timePart)) { $timePart = $part; - } elseif (preg_match('/\d{4}-\d{2}-\d{2}/', $part) and empty($datePart)) { + + $timezoneOffset = normalizeTimezoneOffset($timePart); + if (!$impliedTimezone && $timezoneOffset) { + $impliedTimezone = $timezoneOffset; + } + } elseif (preg_match('/^\d{4}-\d{2}-\d{2}$/', $part) and empty($datePart)) { // Is the current part a valid date AND no other date representation has been found? $datePart = $part; + } elseif (preg_match('/^(Z|[+-]\d{1,2}:?(\d{2})?)$/', $part) and empty($timezonePart)) { + $timezonePart = $part; + + $timezoneOffset = normalizeTimezoneOffset($timezonePart); + if (!$impliedTimezone && $timezoneOffset) { + $impliedTimezone = $timezoneOffset; + } } if ( !empty($datePart) && !in_array($datePart, $dates) ) { $dates[] = $datePart; } + if (!empty($timezonePart) && !empty($timePart)) { + $timePart .= $timezonePart; + } + $dtValue = ''; if ( empty($datePart) && !empty($timePart) ) { $timePart = convertTimeFormat($timePart); - $dtValue = unicodeTrim($timePart, 'T'); + $dtValue = unicodeTrim($timePart); } else if ( !empty($datePart) && empty($timePart) ) { $dtValue = rtrim($datePart, 'T'); } else { $timePart = convertTimeFormat($timePart); - $dtValue = rtrim($datePart, 'T') . 'T' . unicodeTrim($timePart, 'T'); + $dtValue = rtrim($datePart, 'T') . ' ' . unicodeTrim($timePart); } } } @@ -739,36 +790,54 @@ public function parseDT(\DOMElement $dt, &$dates = array()) { // Use @alt // Is it an entire dt? $alt = $dt->getAttribute('alt'); - if (!empty($alt)) + if (!empty($alt)) { $dtValue = $alt; + } } elseif (in_array($dt->tagName, array('data'))) { // Use @value, otherwise innertext // Is it an entire dt? $value = $dt->getAttribute('value'); - if (!empty($value)) + if (!empty($value)) { $dtValue = $value; - else + } + else { $dtValue = $this->textContent($dt); + } } elseif ($dt->tagName == 'abbr') { // Use @title, otherwise innertext // Is it an entire dt? $title = $dt->getAttribute('title'); - if (!empty($title)) + if (!empty($title)) { $dtValue = $title; - else + } + else { $dtValue = $this->textContent($dt); + } } elseif ($dt->tagName == 'del' or $dt->tagName == 'ins' or $dt->tagName == 'time') { // Use @datetime if available, otherwise innertext // Is it an entire dt? $dtAttr = $dt->getAttribute('datetime'); - if (!empty($dtAttr)) + if (!empty($dtAttr)) { $dtValue = $dtAttr; - else + } + else { $dtValue = $this->textContent($dt); + } + } else { $dtValue = $this->textContent($dt); } + // if the dtValue is not just YYYY-MM-DD, normalize the timezone offset + if (!preg_match('/^(\d{4}-\d{2}-\d{2})$/', $dtValue)) { + $timezoneOffset = normalizeTimezoneOffset($dtValue); + if (!$impliedTimezone && $timezoneOffset) { + $impliedTimezone = $timezoneOffset; + } + } + + $dtValue = unicodeTrim($dtValue); + if (preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches)) { $dates[] = $matches[0]; } @@ -778,9 +847,14 @@ public function parseDT(\DOMElement $dt, &$dates = array()) { * if $dtValue is only a time and there are recently parsed dates, * form the full date-time using the most recently parsed dt- value */ - if ((preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates)) { + if ((preg_match('/^\d{1,2}:\d{2}(:\d{2})?(Z|[+-]\d{2}:?\d{2}?)?$/', $dtValue) or preg_match('/^\d{1,2}(:\d{2})?(:\d{2})?[ap]\.?m\.?$/i', $dtValue)) && !empty($dates)) { + $timezoneOffset = normalizeTimezoneOffset($dtValue); + if (!$impliedTimezone && $timezoneOffset) { + $impliedTimezone = $timezoneOffset; + } + $dtValue = convertTimeFormat($dtValue); - $dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T'); + $dtValue = end($dates) . ' ' . unicodeTrim($dtValue); } return $dtValue; @@ -850,6 +924,7 @@ public function parseH(\DOMElement $e, $is_backcompat = false) { $return = array(); $children = array(); $dates = array(); + $impliedTimezone = null; // each rel-bookmark with an href attribute foreach ( $this->xpath->query('.//a[contains(concat(" ",normalize-space(@rel)," ")," bookmark ") and @href]', $e) as $el ) @@ -949,25 +1024,37 @@ public function parseH(\DOMElement $e, $is_backcompat = false) { $this->elementPrefixParsed($u, 'u'); } + $temp_dates = array(); + // Handle dt-* foreach ($this->xpath->query('.//*[contains(concat(" ", @class), " dt-")]', $e) as $dt) { if ($this->isElementParsed($dt, 'dt')) { continue; } - $dtValue = $this->parseDT($dt, $dates); + $dtValue = $this->parseDT($dt, $dates, $impliedTimezone); if ($dtValue) { // Add the value to the array for dt- properties foreach (mfNamesFromElement($dt, 'dt-') as $propName) { - $return[$propName][] = $dtValue; + $temp_dates[$propName][] = $dtValue; } } - // Make sure this sub-mf won’t get parsed as a top level mf $this->elementPrefixParsed($dt, 'dt'); } + foreach ($temp_dates as $propName => $data) { + foreach ( $data as $dtValue ) { + // var_dump(preg_match('/[+-]\d{2}(\d{2})?$/i', $dtValue)); + if ( $impliedTimezone && preg_match('/[+-]\d{2}(\d{2})?$/i', $dtValue, $matches) == 0 ) { + $dtValue .= $impliedTimezone; + } + + $return[$propName][] = $dtValue; + } + } + // Handle e-* foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," e-")]', $e) as $em) { if ($this->isElementParsed($em, 'e')) { diff --git a/tests/Mf2/ClassicMicroformatsTest.php b/tests/Mf2/ClassicMicroformatsTest.php index e1c5514..88797c7 100644 --- a/tests/Mf2/ClassicMicroformatsTest.php +++ b/tests/Mf2/ClassicMicroformatsTest.php @@ -167,8 +167,8 @@ public function test_vevent() { $this->assertEquals('XYZ Project Review', $output['items'][0]['properties']['name'][0]); $this->assertEquals('Project XYZ Review Meeting', $output['items'][0]['properties']['description'][0]); $this->assertEquals('http://example.com/xyz-meeting', $output['items'][0]['properties']['url'][0]); - $this->assertEquals('1998-03-12T08:30', $output['items'][0]['properties']['start'][0]); - $this->assertEquals('1998-03-12T09:30', $output['items'][0]['properties']['end'][0]); + $this->assertEquals('1998-03-12 08:30-0500', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('1998-03-12 09:30-0500', $output['items'][0]['properties']['end'][0]); } diff --git a/tests/Mf2/ParseDTTest.php b/tests/Mf2/ParseDTTest.php index aefbc17..7bb40ac 100644 --- a/tests/Mf2/ParseDTTest.php +++ b/tests/Mf2/ParseDTTest.php @@ -127,7 +127,7 @@ public function testYYYY_MM_DD__HH_MM() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2012-10-07T21:18', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2012-10-07 21:18', $output['items'][0]['properties']['start'][0]); } /** @@ -140,7 +140,7 @@ public function testAbbrYYYY_MM_DD__HH_MM() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2012-10-07T21:18', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2012-10-07 21:18', $output['items'][0]['properties']['start'][0]); } /** @@ -153,7 +153,7 @@ public function testYYYY_MM_DD__HHpm() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2012-10-07T21:00', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2012-10-07 21:00', $output['items'][0]['properties']['start'][0]); } /** @@ -166,7 +166,7 @@ public function testYYYY_MM_DD__HH_MMpm() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2012-10-07T21:00', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2012-10-07 21:00', $output['items'][0]['properties']['start'][0]); } /** @@ -179,7 +179,7 @@ public function testYYYY_MM_DD__HH_MM_SSpm() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2012-10-07T21:00:00', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2012-10-07 21:00:00', $output['items'][0]['properties']['start'][0]); } /** @@ -194,8 +194,8 @@ public function testImpliedDTEndWithValueClass() { $this->assertArrayHasKey('start', $output['items'][0]['properties']); $this->assertArrayHasKey('end', $output['items'][0]['properties']); - $this->assertEquals('2014-06-04T18:30', $output['items'][0]['properties']['start'][0]); - $this->assertEquals('2014-06-04T19:30', $output['items'][0]['properties']['end'][0]); + $this->assertEquals('2014-06-04 18:30', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2014-06-04 19:30', $output['items'][0]['properties']['end'][0]); } /** @@ -211,8 +211,8 @@ public function testImpliedDTEndWithoutValueClass() { $this->assertArrayHasKey('start', $output['items'][0]['properties']); $this->assertArrayHasKey('end', $output['items'][0]['properties']); - $this->assertEquals('2014-06-05T18:31', $output['items'][0]['properties']['start'][0]); - $this->assertEquals('2014-06-05T19:31', $output['items'][0]['properties']['end'][0]); + $this->assertEquals('2014-06-05 18:31', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2014-06-05 19:31', $output['items'][0]['properties']['end'][0]); } /** @@ -229,7 +229,7 @@ public function testImpliedDTEndUsingNonValueClassDTStart() { $this->assertArrayHasKey('start', $output['items'][0]['properties']); $this->assertArrayHasKey('end', $output['items'][0]['properties']); $this->assertEquals('2014-06-05T18:31', $output['items'][0]['properties']['start'][0]); - $this->assertEquals('2014-06-05T19:31', $output['items'][0]['properties']['end'][0]); + $this->assertEquals('2014-06-05 19:31', $output['items'][0]['properties']['end'][0]); } /** @@ -242,7 +242,8 @@ public function testDTStartOnly() { $output = $parser->parse(); $this->assertArrayHasKey('start', $output['items'][0]['properties']); - $this->assertEquals('2014-06-06T18:32', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2014-06-06 18:32', $output['items'][0]['properties']['start'][0]); + $this->assertArrayNotHasKey('end', $output['items'][0]['properties']); } /** @@ -258,4 +259,187 @@ public function testDTStartDateOnly() { $this->assertEquals('2014-06-07', $output['items'][0]['properties']['start'][0]); } + /** + * + */ + public function testNormalizeTZOffset() { + $input = '
+ , from + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-05-27 20:57-0700', $output['items'][0]['properties']['start'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/115 + */ + public function testDoNotAddT() { + $input = '
+ + , from + + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2009-06-26 19:00:00-0800', $output['items'][0]['properties']['start'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/115 + */ + public function testPreserrveTIfAuthored() { + $input = '
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2009-06-26T19:01-0800', $output['items'][0]['properties']['start'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/126 + */ + public function testDtVCPTimezone() { + $input = '
+ HomebrewWebsiteClub Berlin will be next on + + 2017-05-31, from + 19:00 (UTC+02:00) + to 21:00.
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-05-31 19:00+0200', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-05-31 21:00+0200', $output['items'][0]['properties']['end'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/126 + */ + public function testDtVCPTimezoneShort() { + $input = '
+ HomebrewWebsiteClub Berlin will be next on + + 2017-05-31, from + 19:00 (UTC+2) + to 21:00.
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-05-31 19:00+0200', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-05-31 21:00+0200', $output['items'][0]['properties']['end'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/126 + */ + public function testDtVCPTimezoneNoLeadingZero() { + $input = '
+ + 2017-06-17 + 22:00-700 + + + 2017-06-17 + 23:00-700 + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-06-17 22:00-0700', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-06-17 23:00-0700', $output['items'][0]['properties']['end'][0]); + } + + /** + * @see https://github.com/microformats/microformats2-parsing/issues/4 + */ + public function testImplyTimezoneFromStart() { + $input = '
to
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2014-09-11 13:30-0700', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2014-09-11 15:30-0700', $output['items'][0]['properties']['end'][0]); + } + + /** + * @see https://github.com/microformats/microformats2-parsing/issues/4 + */ + public function testImplyTimezoneFromEnd() { + $input = '
to
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2014-09-11 13:30-0700', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2014-09-11 15:30-0700', $output['items'][0]['properties']['end'][0]); + } + + /** + * + */ + public function testAMPMWithPeriods() { + $input = '
+ + 2017-06-11 + 10:00P.M. + + + 2017-06-12 + 02:00a.m. + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-06-11 22:00', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-06-12 02:00', $output['items'][0]['properties']['end'][0]); + } + + /** + * + */ + public function testAMPMWithoutPeriods() { + $input = '
+ + 2017-06-17 + 10:30pm + + + 2017-06-18 + 02:30AM + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-06-17 22:30', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-06-18 02:30', $output['items'][0]['properties']['end'][0]); + } + + /** + * + */ + public function testDtVCPTimeAndTimezone() { + $input = '
+ + 2017-06-17 + 13:30-07:00 + + + 2017-06-17 + 15:30-0700 + +
'; + $parser = new Parser($input); + $output = $parser->parse(); + + $this->assertEquals('2017-06-17 13:30-0700', $output['items'][0]['properties']['start'][0]); + $this->assertEquals('2017-06-17 15:30-0700', $output['items'][0]['properties']['end'][0]); + } + } diff --git a/tests/Mf2/ParseValueClassTitleTest.php b/tests/Mf2/ParseValueClassTitleTest.php index 40c24e1..de79ec6 100644 --- a/tests/Mf2/ParseValueClassTitleTest.php +++ b/tests/Mf2/ParseValueClassTitleTest.php @@ -71,7 +71,7 @@ class="dt-published published dt-updated updated u-url u-uid"> $output = $parser->parse(); $this->assertArrayHasKey('published', $output['items'][0]['properties']); - $this->assertEquals('2013-06-27T10:17', $output['items'][0]['properties']['published'][0]); + $this->assertEquals('2013-06-27 10:17', $output['items'][0]['properties']['published'][0]); } /** @@ -103,6 +103,6 @@ public function testIgnoresValueClassNestedFurtherThanChild() { public function testValueClassDtMatchesSingleDigitTimeComponent() { $test = '
,
'; $result = Mf2\parse($test); - $this->assertEquals('2013-02-01T6:01', $result['items'][0]['properties']['published'][0]); + $this->assertEquals('2013-02-01 6:01', $result['items'][0]['properties']['published'][0]); } }