@@ -754,6 +754,26 @@ def is_valid(self) -> bool:
754
754
>>> Language.get('x-heptapod').is_valid()
755
755
True
756
756
757
+ A language tag with multiple extlangs will parse, but is not valid.
758
+ The only allowed example is 'zh-min-nan', which normalizes to the
759
+ language 'nan'.
760
+
761
+ >>> Language.get('zh-min-nan').is_valid()
762
+ True
763
+ >>> Language.get('sgn-ase-bfi').is_valid()
764
+ False
765
+
766
+ These examples check that duplicate tags are not valid:
767
+
768
+ >>> Language.get('de-1901').is_valid()
769
+ True
770
+ >>> Language.get('de-1901-1901').is_valid()
771
+ False
772
+ >>> Language.get('en-a-bbb-c-ddd').is_valid()
773
+ True
774
+ >>> Language.get('en-a-bbb-a-ddd').is_valid()
775
+ False
776
+
757
777
Of course, you should be prepared to catch a failure to parse the
758
778
language code at all:
759
779
@@ -762,13 +782,31 @@ def is_valid(self) -> bool:
762
782
...
763
783
langcodes.tag_parser.LanguageTagError: Expected a language code, got 'c'
764
784
"""
785
+ if self .extlangs is not None :
786
+ # An erratum to BCP 47 says that tags with more than one extlang are
787
+ # invalid.
788
+ if len (self .extlangs ) > 1 :
789
+ return False
790
+
765
791
subtags = [self .language , self .script , self .territory ]
792
+ checked_subtags = []
766
793
if self .variants is not None :
767
794
subtags .extend (self .variants )
768
795
for subtag in subtags :
769
796
if subtag is not None :
797
+ checked_subtags .append (subtag )
770
798
if not subtag .startswith ('x-' ) and not VALIDITY .match (subtag ):
771
799
return False
800
+
801
+ # We check extensions for validity by ensuring that there aren't
802
+ # two extensions introduced by the same letter. For example, you can't
803
+ # have two 'u-' extensions.
804
+ if self .extensions :
805
+ checked_subtags .extend (
806
+ [extension [:2 ] for extension in self .extensions ]
807
+ )
808
+ if len (set (checked_subtags )) != len (checked_subtags ):
809
+ return False
772
810
return True
773
811
774
812
def has_name_data (self ) -> bool :
@@ -1555,8 +1593,8 @@ def standardize_tag(tag: Union[str, Language], macro: bool = False) -> str:
1555
1593
def tag_is_valid (tag : Union [str , Language ]) -> bool :
1556
1594
"""
1557
1595
Determines whether a string is a valid language tag. This is similar to
1558
- Language.get(tag).is_valid(), but can return False in the case where the
1559
- tag doesn't parse.
1596
+ Language.get(tag).is_valid(), but can return False in the case where
1597
+ the tag doesn't parse.
1560
1598
1561
1599
>>> tag_is_valid('ja')
1562
1600
True
0 commit comments