// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <map> #include <string> #include "net/base/mime_util.h" #include "net/base/platform_mime_util.h" #include "base/hash_tables.h" #include "base/lazy_instance.h" #include "base/logging.h" #include "base/string_split.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" using std::string; namespace net { // Singleton utility class for mime types. class MimeUtil : public PlatformMimeUtil { public: bool GetMimeTypeFromExtension(const FilePath::StringType& ext, std::string* mime_type) const; bool GetMimeTypeFromFile(const FilePath& file_path, std::string* mime_type) const; bool IsSupportedImageMimeType(const char* mime_type) const; bool IsSupportedMediaMimeType(const char* mime_type) const; bool IsSupportedNonImageMimeType(const char* mime_type) const; bool IsSupportedJavascriptMimeType(const char* mime_type) const; bool IsViewSourceMimeType(const char* mime_type) const; bool IsSupportedMimeType(const std::string& mime_type) const; bool MatchesMimeType(const std::string &mime_type_pattern, const std::string &mime_type) const; bool AreSupportedMediaCodecs(const std::vector<std::string>& codecs) const; void ParseCodecString(const std::string& codecs, std::vector<std::string>* codecs_out, bool strip); bool IsStrictMediaMimeType(const std::string& mime_type) const; bool IsSupportedStrictMediaMimeType(const std::string& mime_type, const std::vector<std::string>& codecs) const; private: friend struct base::DefaultLazyInstanceTraits<MimeUtil>; MimeUtil() { InitializeMimeTypeMaps(); } // For faster lookup, keep hash sets. void InitializeMimeTypeMaps(); typedef base::hash_set<std::string> MimeMappings; MimeMappings image_map_; MimeMappings media_map_; MimeMappings non_image_map_; MimeMappings javascript_map_; MimeMappings view_source_map_; MimeMappings codecs_map_; typedef std::map<std::string, base::hash_set<std::string> > StrictMappings; StrictMappings strict_format_map_; }; // class MimeUtil static base::LazyInstance<MimeUtil> g_mime_util(base::LINKER_INITIALIZED); struct MimeInfo { const char* mime_type; const char* extensions; // comma separated list }; static const MimeInfo primary_mappings[] = { { "text/html", "html,htm" }, { "text/css", "css" }, { "text/xml", "xml" }, { "image/gif", "gif" }, { "image/jpeg", "jpeg,jpg" }, { "image/webp", "webp" }, { "image/png", "png" }, { "video/mp4", "mp4,m4v" }, { "audio/x-m4a", "m4a" }, { "audio/mp3", "mp3" }, { "video/ogg", "ogv,ogm" }, { "audio/ogg", "ogg,oga" }, { "video/webm", "webm" }, { "audio/webm", "webm" }, { "audio/wav", "wav" }, { "application/xhtml+xml", "xhtml,xht" }, { "application/x-chrome-extension", "crx" } }; static const MimeInfo secondary_mappings[] = { { "application/octet-stream", "exe,com,bin" }, { "application/gzip", "gz" }, { "application/pdf", "pdf" }, { "application/postscript", "ps,eps,ai" }, { "application/x-javascript", "js" }, { "image/bmp", "bmp" }, { "image/x-icon", "ico" }, { "image/jpeg", "jfif,pjpeg,pjp" }, { "image/tiff", "tiff,tif" }, { "image/x-xbitmap", "xbm" }, { "image/svg+xml", "svg,svgz" }, { "message/rfc822", "eml" }, { "text/plain", "txt,text" }, { "text/html", "shtml,ehtml" }, { "application/rss+xml", "rss" }, { "application/rdf+xml", "rdf" }, { "text/xml", "xsl,xbl" }, { "application/vnd.mozilla.xul+xml", "xul" }, { "application/x-shockwave-flash", "swf,swl" } }; static const char* FindMimeType(const MimeInfo* mappings, size_t mappings_len, const char* ext) { size_t ext_len = strlen(ext); for (size_t i = 0; i < mappings_len; ++i) { const char* extensions = mappings[i].extensions; for (;;) { size_t end_pos = strcspn(extensions, ","); if (end_pos == ext_len && base::strncasecmp(extensions, ext, ext_len) == 0) return mappings[i].mime_type; extensions += end_pos; if (!*extensions) break; extensions += 1; // skip over comma } } return NULL; } bool MimeUtil::GetMimeTypeFromExtension(const FilePath::StringType& ext, string* result) const { // Avoids crash when unable to handle a long file path. See crbug.com/48733. const unsigned kMaxFilePathSize = 65536; if (ext.length() > kMaxFilePathSize) return false; // We implement the same algorithm as Mozilla for mapping a file extension to // a mime type. That is, we first check a hard-coded list (that cannot be // overridden), and then if not found there, we defer to the system registry. // Finally, we scan a secondary hard-coded list to catch types that we can // deduce but that we also want to allow the OS to override. #if defined(OS_WIN) string ext_narrow_str = WideToUTF8(ext); #elif defined(OS_POSIX) const string& ext_narrow_str = ext; #endif const char* mime_type; mime_type = FindMimeType(primary_mappings, arraysize(primary_mappings), ext_narrow_str.c_str()); if (mime_type) { *result = mime_type; return true; } if (GetPlatformMimeTypeFromExtension(ext, result)) return true; mime_type = FindMimeType(secondary_mappings, arraysize(secondary_mappings), ext_narrow_str.c_str()); if (mime_type) { *result = mime_type; return true; } return false; } bool MimeUtil::GetMimeTypeFromFile(const FilePath& file_path, string* result) const { FilePath::StringType file_name_str = file_path.Extension(); if (file_name_str.empty()) return false; return GetMimeTypeFromExtension(file_name_str.substr(1), result); } // From WebKit's WebCore/platform/MIMETypeRegistry.cpp: static const char* const supported_image_types[] = { "image/jpeg", "image/pjpeg", "image/jpg", "image/webp", "image/png", "image/gif", "image/bmp", "image/x-icon", // ico "image/x-xbitmap" // xbm }; // A list of media types: http://en.wikipedia.org/wiki/Internet_media_type // A comprehensive mime type list: http://plugindoc.mozdev.org/winmime.php static const char* const supported_media_types[] = { // Ogg. "video/ogg", "audio/ogg", "application/ogg", "video/webm", "audio/webm", "audio/wav", "audio/x-wav", #if defined(GOOGLE_CHROME_BUILD) || defined(USE_PROPRIETARY_CODECS) // MPEG-4. "video/mp4", "video/x-m4v", "audio/mp4", "audio/x-m4a", // MP3. "audio/mp3", "audio/x-mp3", "audio/mpeg", #endif }; // List of supported codecs when passed in with <source type="...">. // // Refer to http://wiki.whatwg.org/wiki/Video_type_parameters#Browser_Support // for more information. static const char* const supported_media_codecs[] = { #if defined(GOOGLE_CHROME_BUILD) || defined(USE_PROPRIETARY_CODECS) "avc1", "mp4a", #endif "theora", "vorbis", "vp8", "1" // PCM for WAV. }; // Note: does not include javascript types list (see supported_javascript_types) static const char* const supported_non_image_types[] = { "text/cache-manifest", "text/html", "text/xml", "text/xsl", "text/plain", // Many users complained about css files served for // download instead of displaying in the browser: // http://code.google.com/p/chromium/issues/detail?id=7192 // So, by including "text/css" into this list we choose Firefox // behavior - css files will be displayed: "text/css", "text/vnd.chromium.ftp-dir", "text/", "image/svg+xml", // SVG is text-based XML, even though it has an image/ type "application/xml", "application/xhtml+xml", "application/rss+xml", "application/atom+xml", "application/json", "application/x-x509-user-cert", "multipart/x-mixed-replace" // Note: ADDING a new type here will probably render it AS HTML. This can // result in cross site scripting. }; COMPILE_ASSERT(arraysize(supported_non_image_types) == 16, supported_non_images_types_must_equal_16); // Mozilla 1.8 and WinIE 7 both accept text/javascript and text/ecmascript. // Mozilla 1.8 accepts application/javascript, application/ecmascript, and // application/x-javascript, but WinIE 7 doesn't. // WinIE 7 accepts text/javascript1.1 - text/javascript1.3, text/jscript, and // text/livescript, but Mozilla 1.8 doesn't. // Mozilla 1.8 allows leading and trailing whitespace, but WinIE 7 doesn't. // Mozilla 1.8 and WinIE 7 both accept the empty string, but neither accept a // whitespace-only string. // We want to accept all the values that either of these browsers accept, but // not other values. static const char* const supported_javascript_types[] = { "text/javascript", "text/ecmascript", "application/javascript", "application/ecmascript", "application/x-javascript", "text/javascript1.1", "text/javascript1.2", "text/javascript1.3", "text/jscript", "text/livescript" }; static const char* const view_source_types[] = { "text/xml", "text/xsl", "application/xml", "application/rss+xml", "application/atom+xml", "image/svg+xml" }; struct MediaFormatStrict { const char* mime_type; const char* codecs_list; }; static const MediaFormatStrict format_codec_mappings[] = { { "video/webm", "vorbis,vp8,vp8.0" }, { "audio/webm", "vorbis" }, { "audio/wav", "1" } }; void MimeUtil::InitializeMimeTypeMaps() { for (size_t i = 0; i < arraysize(supported_image_types); ++i) image_map_.insert(supported_image_types[i]); // Initialize the supported non-image types. for (size_t i = 0; i < arraysize(supported_non_image_types); ++i) non_image_map_.insert(supported_non_image_types[i]); for (size_t i = 0; i < arraysize(supported_javascript_types); ++i) non_image_map_.insert(supported_javascript_types[i]); for (size_t i = 0; i < arraysize(supported_media_types); ++i) non_image_map_.insert(supported_media_types[i]); // Initialize the supported media types. for (size_t i = 0; i < arraysize(supported_media_types); ++i) media_map_.insert(supported_media_types[i]); for (size_t i = 0; i < arraysize(supported_javascript_types); ++i) javascript_map_.insert(supported_javascript_types[i]); for (size_t i = 0; i < arraysize(view_source_types); ++i) view_source_map_.insert(view_source_types[i]); for (size_t i = 0; i < arraysize(supported_media_codecs); ++i) codecs_map_.insert(supported_media_codecs[i]); // Initialize the strict supported media types. for (size_t i = 0; i < arraysize(format_codec_mappings); ++i) { std::vector<std::string> mime_type_codecs; ParseCodecString(format_codec_mappings[i].codecs_list, &mime_type_codecs, false); MimeMappings codecs; for (size_t j = 0; j < mime_type_codecs.size(); ++j) codecs.insert(mime_type_codecs[j]); strict_format_map_[format_codec_mappings[i].mime_type] = codecs; } } bool MimeUtil::IsSupportedImageMimeType(const char* mime_type) const { return image_map_.find(mime_type) != image_map_.end(); } bool MimeUtil::IsSupportedMediaMimeType(const char* mime_type) const { return media_map_.find(mime_type) != media_map_.end(); } bool MimeUtil::IsSupportedNonImageMimeType(const char* mime_type) const { return non_image_map_.find(mime_type) != non_image_map_.end(); } bool MimeUtil::IsSupportedJavascriptMimeType(const char* mime_type) const { return javascript_map_.find(mime_type) != javascript_map_.end(); } bool MimeUtil::IsViewSourceMimeType(const char* mime_type) const { return view_source_map_.find(mime_type) != view_source_map_.end(); } // Mirrors WebViewImpl::CanShowMIMEType() bool MimeUtil::IsSupportedMimeType(const std::string& mime_type) const { return (mime_type.compare(0, 6, "image/") == 0 && IsSupportedImageMimeType(mime_type.c_str())) || IsSupportedNonImageMimeType(mime_type.c_str()); } bool MimeUtil::MatchesMimeType(const std::string &mime_type_pattern, const std::string &mime_type) const { // verify caller is passing lowercase DCHECK_EQ(StringToLowerASCII(mime_type_pattern), mime_type_pattern); DCHECK_EQ(StringToLowerASCII(mime_type), mime_type); // This comparison handles absolute maching and also basic // wildcards. The plugin mime types could be: // application/x-foo // application/* // application/*+xml // * if (mime_type_pattern.empty()) return false; const std::string::size_type star = mime_type_pattern.find('*'); if (star == std::string::npos) return mime_type_pattern == mime_type; // Test length to prevent overlap between |left| and |right|. if (mime_type.length() < mime_type_pattern.length() - 1) return false; const std::string left(mime_type_pattern.substr(0, star)); const std::string right(mime_type_pattern.substr(star + 1)); if (mime_type.find(left) != 0) return false; if (!right.empty() && mime_type.rfind(right) != mime_type.length() - right.length()) return false; return true; } bool MimeUtil::AreSupportedMediaCodecs( const std::vector<std::string>& codecs) const { for (size_t i = 0; i < codecs.size(); ++i) { if (codecs_map_.find(codecs[i]) == codecs_map_.end()) { return false; } } return true; } void MimeUtil::ParseCodecString(const std::string& codecs, std::vector<std::string>* codecs_out, bool strip) { std::string no_quote_codecs; TrimString(codecs, "\"", &no_quote_codecs); base::SplitString(no_quote_codecs, ',', codecs_out); if (!strip) return; // Strip everything past the first '.' for (std::vector<std::string>::iterator it = codecs_out->begin(); it != codecs_out->end(); ++it) { size_t found = it->find_first_of('.'); if (found != std::string::npos) it->resize(found); } } bool MimeUtil::IsStrictMediaMimeType(const std::string& mime_type) const { if (strict_format_map_.find(mime_type) == strict_format_map_.end()) return false; return true; } bool MimeUtil::IsSupportedStrictMediaMimeType(const std::string& mime_type, const std::vector<std::string>& codecs) const { StrictMappings::const_iterator it = strict_format_map_.find(mime_type); if (it == strict_format_map_.end()) return false; const MimeMappings strict_codecs_map = it->second; for (size_t i = 0; i < codecs.size(); ++i) { if (strict_codecs_map.find(codecs[i]) == strict_codecs_map.end()) { return false; } } return true; } //---------------------------------------------------------------------------- // Wrappers for the singleton //---------------------------------------------------------------------------- bool GetMimeTypeFromExtension(const FilePath::StringType& ext, std::string* mime_type) { return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type); } bool GetMimeTypeFromFile(const FilePath& file_path, std::string* mime_type) { return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type); } bool GetPreferredExtensionForMimeType(const std::string& mime_type, FilePath::StringType* extension) { return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type, extension); } bool IsSupportedImageMimeType(const char* mime_type) { return g_mime_util.Get().IsSupportedImageMimeType(mime_type); } bool IsSupportedMediaMimeType(const char* mime_type) { return g_mime_util.Get().IsSupportedMediaMimeType(mime_type); } bool IsSupportedNonImageMimeType(const char* mime_type) { return g_mime_util.Get().IsSupportedNonImageMimeType(mime_type); } bool IsSupportedJavascriptMimeType(const char* mime_type) { return g_mime_util.Get().IsSupportedJavascriptMimeType(mime_type); } bool IsViewSourceMimeType(const char* mime_type) { return g_mime_util.Get().IsViewSourceMimeType(mime_type); } bool IsSupportedMimeType(const std::string& mime_type) { return g_mime_util.Get().IsSupportedMimeType(mime_type); } bool MatchesMimeType(const std::string &mime_type_pattern, const std::string &mime_type) { return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type); } bool AreSupportedMediaCodecs(const std::vector<std::string>& codecs) { return g_mime_util.Get().AreSupportedMediaCodecs(codecs); } bool IsStrictMediaMimeType(const std::string& mime_type) { return g_mime_util.Get().IsStrictMediaMimeType(mime_type); } bool IsSupportedStrictMediaMimeType(const std::string& mime_type, const std::vector<std::string>& codecs) { return g_mime_util.Get().IsSupportedStrictMediaMimeType(mime_type, codecs); } void ParseCodecString(const std::string& codecs, std::vector<std::string>* codecs_out, const bool strip) { g_mime_util.Get().ParseCodecString(codecs, codecs_out, strip); } namespace { // From http://www.w3schools.com/media/media_mimeref.asp and // http://plugindoc.mozdev.org/winmime.php static const char* kStandardImageTypes[] = { "image/bmp", "image/cis-cod", "image/gif", "image/ief", "image/jpeg", "image/webp", "image/pict", "image/pipeg", "image/png", "image/svg+xml", "image/tiff", "image/x-cmu-raster", "image/x-cmx", "image/x-icon", "image/x-portable-anymap", "image/x-portable-bitmap", "image/x-portable-graymap", "image/x-portable-pixmap", "image/x-rgb", "image/x-xbitmap", "image/x-xpixmap", "image/x-xwindowdump" }; static const char* kStandardAudioTypes[] = { "audio/aac", "audio/aiff", "audio/amr", "audio/basic", "audio/midi", "audio/mp3", "audio/mp4", "audio/mpeg", "audio/mpeg3", "audio/ogg", "audio/vorbis", "audio/wav", "audio/webm", "audio/x-m4a", "audio/x-ms-wma", "audio/vnd.rn-realaudio", "audio/vnd.wave" }; static const char* kStandardVideoTypes[] = { "video/avi", "video/divx", "video/flc", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime", "video/sd-video", "video/webm", "video/x-dv", "video/x-m4v", "video/x-mpeg", "video/x-ms-asf", "video/x-ms-wmv" }; void GetExtensionsFromHardCodedMappings( const MimeInfo* mappings, size_t mappings_len, const std::string& leading_mime_type, base::hash_set<FilePath::StringType>* extensions) { FilePath::StringType extension; for (size_t i = 0; i < mappings_len; ++i) { if (StartsWithASCII(mappings[i].mime_type, leading_mime_type, false)) { std::vector<string> this_extensions; base::SplitStringUsingSubstr(mappings[i].extensions, ",", &this_extensions); for (size_t j = 0; j < this_extensions.size(); ++j) { #if defined(OS_WIN) FilePath::StringType extension(UTF8ToWide(this_extensions[j])); #else FilePath::StringType extension(this_extensions[j]); #endif extensions->insert(extension); } } } } void GetExtensionsHelper( const char** standard_types, size_t standard_types_len, const std::string& leading_mime_type, base::hash_set<FilePath::StringType>* extensions) { FilePath::StringType extension; for (size_t i = 0; i < standard_types_len; ++i) { if (GetPreferredExtensionForMimeType(standard_types[i], &extension)) extensions->insert(extension); } // Also look up the extensions from hard-coded mappings in case that some // supported extensions are not registered in the system registry, like ogg. GetExtensionsFromHardCodedMappings(primary_mappings, arraysize(primary_mappings), leading_mime_type, extensions); GetExtensionsFromHardCodedMappings(secondary_mappings, arraysize(secondary_mappings), leading_mime_type, extensions); } // Note that the elements in the source set will be appended to the target // vector. template<class T> void HashSetToVector(base::hash_set<T>* source, std::vector<T>* target) { size_t old_target_size = target->size(); target->resize(old_target_size + source->size()); size_t i = 0; for (typename base::hash_set<T>::iterator iter = source->begin(); iter != source->end(); ++iter, ++i) { target->at(old_target_size + i) = *iter; } } } void GetImageExtensions(std::vector<FilePath::StringType>* extensions) { base::hash_set<FilePath::StringType> unique_extensions; GetExtensionsHelper(kStandardImageTypes, arraysize(kStandardImageTypes), "image/", &unique_extensions); HashSetToVector(&unique_extensions, extensions); } void GetAudioExtensions(std::vector<FilePath::StringType>* extensions) { base::hash_set<FilePath::StringType> unique_extensions; GetExtensionsHelper(kStandardAudioTypes, arraysize(kStandardAudioTypes), "audio/", &unique_extensions); HashSetToVector(&unique_extensions, extensions); } void GetVideoExtensions(std::vector<FilePath::StringType>* extensions) { base::hash_set<FilePath::StringType> unique_extensions; GetExtensionsHelper(kStandardVideoTypes, arraysize(kStandardVideoTypes), "video/", &unique_extensions); HashSetToVector(&unique_extensions, extensions); } void GetExtensionsForMimeType(const std::string& mime_type, std::vector<FilePath::StringType>* extensions) { base::hash_set<FilePath::StringType> unique_extensions; FilePath::StringType extension; if (GetPreferredExtensionForMimeType(mime_type, &extension)) unique_extensions.insert(extension); // Also look up the extensions from hard-coded mappings in case that some // supported extensions are not registered in the system registry, like ogg. GetExtensionsFromHardCodedMappings(primary_mappings, arraysize(primary_mappings), mime_type, &unique_extensions); GetExtensionsFromHardCodedMappings(secondary_mappings, arraysize(secondary_mappings), mime_type, &unique_extensions); HashSetToVector(&unique_extensions, extensions); } } // namespace net