Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ramsey/mimeparse
Browse files Browse the repository at this point in the history
* 'master' of github.com:ramsey/mimeparse:
  More accurately differentiate between media-ranges and mime-types.
  Minor comment improvements.
  Wrap comments at 80 characters wherever possible.
  Make difference between parseMediaRange and parseMimeType more clear.
  Clarify exception message.
  Improve and correct some documentation.
  Use order of $supported array instead of $tieBreaker to break ties.
  Fix return value documentation for qualityAndFitnessParsed.
  Simplify process of rejecting unacceptable types.
  Rename fitnessAndQualityParsed to represent order of returned array.
  Use the term "generic subtype" instead of "format".
  Make some comments more PHP-centric.
  • Loading branch information
Ben Ramsey committed Sep 18, 2012
2 parents 8c53c4f + 563b9cd commit 4de509f
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 136 deletions.
175 changes: 80 additions & 95 deletions src/Bitworking/Mimeparse.php
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
<?php
/**
* Mimeparse class. This class provides basic functions for handling mime-types. It can
* handle matching mime-types against a list of media-ranges. See section
* 14.1 of the HTTP specification [RFC 2616] for a complete explanation.
* Mimeparse class. Provides basic functions for handling mime-types. It can
* match mime-types against a list of media-ranges. See section 14.1 of the
* HTTP specification [RFC 2616] for a complete explanation.
*
* It's just a port to php from original Python code (http://code.google.com/p/mimeparse/).
* It's a PHP port of the original Python code
* (http://code.google.com/p/mimeparse/).
*
* Ported from version 0.1.2. Comments are mostly excerpted from the original.
*
Expand All @@ -17,27 +18,30 @@
class Mimeparse
{
/**
* Parses a mime-type and returns an array with its components.
* Parses a media-range and returns an array with its components.
*
* The array returned contains:
* The returned array contains:
*
* 1. type: The type categorization.
* 2. subtype: The subtype categorization.
* 3. params: A hash of all the parameters for the media range.
* 4. format: The content format.
* 3. params: An associative array of all the parameters for the
* media-range.
* 4. generic subtype: A more generic subtype, if one is present. See
* http://tools.ietf.org/html/rfc3023#appendix-A.12
*
* For example, the media range "application/xhtml+xml;q=0.5" would
* get parsed into:
* For example, the media-range "application/xhtml+xml;q=0.5" would get
* parsed into:
*
* array("application", "xhtml", array( "q" => "0.5" ), "xml")
* array("application", "xhtml+xml", array( "q" => "0.5" ), "xml")
*
* @param string $mimeType
* @return array ($type, $subtype, $params)
* @throws UnexpectedValueException when $mimeType does not include a valid subtype
* @param string $mediaRange
* @return array ($type, $subtype, $params, $genericSubtype)
* @throws UnexpectedValueException when $mediaRange does not include a
* valid subtype
*/
public static function parseMimeType($mimeType)
public static function parseMediaRange($mediaRange)
{
$parts = explode(';', $mimeType);
$parts = explode(';', $mediaRange);

$params = array();
foreach ($parts as $i => $param) {
Expand All @@ -49,78 +53,72 @@ public static function parseMimeType($mimeType)

$fullType = trim($parts[0]);

// Java URLConnection class sends an Accept header that includes a single "*"
// Turn it into a legal wildcard.
// Java URLConnection class sends an Accept header that includes a
// single "*". Turn it into a legal wildcard.
if ($fullType == '*') {
$fullType = '*/*';
}

list($type, $subtype) = explode('/', $fullType);

if (!$subtype) {
throw new \UnexpectedValueException('malformed mime type');
throw new \UnexpectedValueException('Malformed media-range: '.$mediaRange);
}

if (false !== strpos($subtype, '+')) {
// don't rewrite subtype to prevent compatibility issues
list(/*$subtype*/, $format) = explode('+', $subtype, 2);
$plusPos = strpos($subtype, '+');
if (false !== $plusPos) {
$genericSubtype = substr($subtype, $plusPos + 1);
} else {
$format = $subtype;
$genericSubtype = $subtype;
}

return array(trim($type), trim($subtype), $params, $format);
return array(trim($type), trim($subtype), $params, $genericSubtype);
}


/**
* Carves up a media range and returns an Array of the
* [type, subtype, params] where "params" is a Hash of all
* the parameters for the media range.
*
* For example, the media range "application/*;q=0.5" would
* get parsed into:
*
* array("application", "*", ( "q", "0.5" ))
* Parses a media-range via Mimeparse::parseMediaRange() and guarantees that
* there is a value for the "q" param, filling it in with a proper default
* if necessary.
*
* In addition this function also guarantees that there
* is a value for "q" in the params dictionary, filling it
* in with a proper default if necessary.
*
* @param string $range
* @return array ($type, $subtype, $params)
* @param string $mediaRange
* @return array ($type, $subtype, $params, $genericSubtype)
*/
protected static function parseMediaRange($range)
protected static function parseAndNormalizeMediaRange($mediaRange)
{
list($type, $subtype, $params) = self::parseMimeType($range);
$parsedMediaRange = self::parseMediaRange($mediaRange);
$params = $parsedMediaRange[2];

if (!isset($params['q'])
|| !is_numeric($params['q'])
|| floatval($params['q']) > 1
|| floatval($params['q']) < 0
) {
$params['q'] = '1';
$parsedMediaRange[2]['q'] = '1';
}

return array($type, $subtype, $params);
return $parsedMediaRange;
}

/**
* Find the best match for a given mime-type against a list of
* media-ranges that have already been parsed by Mimeparse::parseMediaRange()
* media-ranges that have already been parsed by
* Mimeparse::parseAndNormalizeMediaRange()
*
* Returns the fitness and the "q" quality parameter of the best match, or an
* array [-1, 0] if no match was found. Just as for Mimeparse::quality(),
* $parsedRanges must be an Enumerable of parsed media-ranges.
* Returns the fitness and the "q" quality parameter of the best match, or
* an array [-1, 0] if no match was found. Just as for
* Mimeparse::quality(), $parsedRanges must be an array of parsed
* media-ranges.
*
* @param string $mimeType
* @param array $parsedRanges
* @return array ($bestFitness, $bestFitQuality)
* @return array ($bestFitQuality, $bestFitness)
*/
protected static function fitnessAndQualityParsed($mimeType, $parsedRanges)
protected static function qualityAndFitnessParsed($mimeType, $parsedRanges)
{
$bestFitness = -1;
$bestFitQuality = 0;
list($targetType, $targetSubtype, $targetParams) = self::parseMediaRange($mimeType);
list($targetType, $targetSubtype, $targetParams) = self::parseAndNormalizeMediaRange($mimeType);

foreach ($parsedRanges as $item) {
list($type, $subtype, $params) = $item;
Expand Down Expand Up @@ -151,25 +149,26 @@ protected static function fitnessAndQualityParsed($mimeType, $parsedRanges)

/**
* Find the best match for a given mime-type against a list of
* media-ranges that have already been parsed by Mimeparse::parseMediaRange()
* media-ranges that have already been parsed by
* Mimeparse::parseAndNormalizeMediaRange()
*
* Returns the "q" quality parameter of the best match, 0 if no match
* was found. This function behaves the same as Mimeparse::quality() except that
* $parsedRanges must be an Enumerable of parsed media-ranges.
* Returns the "q" quality parameter of the best match, 0 if no match was
* found. This function behaves the same as Mimeparse::quality() except
* that $parsedRanges must be an array of parsed media-ranges.
*
* @param string $mimeType
* @param array $parsedRanges
* @return float $q
*/
protected static function qualityParsed($mimeType, $parsedRanges)
{
list($q, $fitness) = self::fitnessAndQualityParsed($mimeType, $parsedRanges);
list($q, $fitness) = self::qualityAndFitnessParsed($mimeType, $parsedRanges);
return $q;
}

/**
* Returns the quality "q" of a mime-type when compared against
* the media-ranges in ranges. For example:
* Returns the quality "q" of a mime-type when compared against the
* media-ranges in ranges. For example:
*
* Mimeparse::quality("text/html", "text/*;q=0.3, text/html;q=0.7,
* text/html;level=1, text/html;level=2;q=0.4, *\/*;q=0.5")
Expand All @@ -184,71 +183,57 @@ public static function quality($mimeType, $ranges)
$parsedRanges = explode(',', $ranges);

foreach ($parsedRanges as $i => $r) {
$parsedRanges[$i] = self::parseMediaRange($r);
$parsedRanges[$i] = self::parseAndNormalizeMediaRange($r);
}

return self::qualityParsed($mimeType, $parsedRanges);
}

/**
* Takes a list of supported mime-types and finds the best match
* for all the media-ranges listed in header. The value of header
* must be a string that conforms to the format of the HTTP Accept:
* header. The value of supported is an Enumerable of mime-types
* Takes a list of supported mime-types and finds the best match for all
* the media-ranges listed in header. The value of $header must be a
* string that conforms to the format of the HTTP Accept: header. The
* value of $supported is an array of mime-types.
*
* In case of ties the mime-type with the lowest index in $supported will
* be used.
*
* Mimeparse::bestMatch(array("application/xbel+xml", "text/xml"), "text/*;q=0.5,*\/*; q=0.1")
* => "text/xml"
*
* @param array $supported
* @param string $header
* @param string $tieBreaker In case of a tie, this mime-type is preferred
* @return mixed $mimeType or NULL
*/
public static function bestMatch($supported, $header, $tieBreaker = null)
public static function bestMatch($supported, $header)
{
$parsedHeader = explode(',', $header);

foreach ($parsedHeader as $i => $r) {
$parsedHeader[$i] = self::parseMediaRange($r);
$parsedHeader[$i] = self::parseAndNormalizeMediaRange($r);
}

$weightedMatches = array();
foreach ($supported as $mimeType) {
$weightedMatches[] = array(
self::fitnessAndQualityParsed($mimeType, $parsedHeader),
$mimeType
);
}

// If the best fit quality is 0 for anything, then it is
// not acceptable for the client; remove it from the list
// of weighted matches.
$unacceptableTypes = array();
foreach ($weightedMatches as $k => $v) {
if (empty($v[0][0])) {
$unacceptableTypes[] = $k;
foreach ($supported as $index => $mimeType) {
list($quality, $fitness) = self::qualityAndFitnessParsed($mimeType, $parsedHeader);
if (!empty($quality)) {
// Mime-types closer to the beginning of the array are
// preferred. This preference score is used to break ties.
$preference = 0 - $index;
$weightedMatches[] = array(
array($quality, $fitness, $preference),
$mimeType
);
}
}
foreach ($unacceptableTypes as $weightedMatchKey) {
unset($weightedMatches[$weightedMatchKey]);
}

// Note that since fitness and preference are present in
// $weightedMatches they will also be used when sorting (after quality
// level).
array_multisort($weightedMatches);
$a = array_pop($weightedMatches);

// If there's a tie breaker specified, see if we have any ties
// and then break them with the $tieBreaker
if ($tieBreaker) {
array_push($weightedMatches, $a);
$ties = array_filter($weightedMatches, function ($val) use ($a) {
return ($val[0] == $a[0]);
});
if (count($ties) > 1 && in_array(array($a[0], $tieBreaker), $ties)) {
return $tieBreaker;
}
}
$firstChoice = array_pop($weightedMatches);

return (empty($a[0][0]) ? null : $a[1]);
return (empty($firstChoice[0][0]) ? null : $firstChoice[1]);
}

/**
Expand Down
Loading

0 comments on commit 4de509f

Please sign in to comment.