Skip to content

Commit

Permalink
Add authority and availability fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Xennis committed Jun 6, 2024
1 parent a3683d4 commit 0872ea7
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Call the attributes, for example
| edition_foreign_languages | `//body/div[@type='edition']//foreign/@xml:lang` |
| edition_language | `//body/div[@type='edition']/@xml:lang` |
| idno | `//teiHeader/fileDesc/publicationStmt/idno` |
| authority | `//teiHeader/fileDesc/publicationStmt/authority` |
| availability | `//teiHeader/fileDesc/publicationStmt/availability` |
| languages | `//teiHeader/profileDesc/langUsage/language` |
| material | `//teiHeader/fileDesc/sourceDesc/msDesc/physDesc/objectDesc//support/material` |
| origin_dates | `//teiHeader/fileDesc/sourceDesc/msDesc/history/origin/origDate` |
Expand Down
25 changes: 24 additions & 1 deletion epidoc/api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

from bs4 import BeautifulSoup

from .body import _Edition, _Head
Expand All @@ -9,6 +11,8 @@ class EpiDoc:

title = None
idno: dict[str, str] = {}
authority: Optional[str] = None
availability: Optional[str] = None
material = None
origin_dates: list[str] = []
origin_place: dict[str, str] = {}
Expand All @@ -26,6 +30,8 @@ def create(
cls,
title,
idno,
authority=None,
availability=None,
material=None,
origin_dates=None,
origin_place=None,
Expand All @@ -42,6 +48,10 @@ def create(
h.title = title
h.idno = idno
h.material = material
if authority is not None:
h.authority = authority
if availability is not None:
h.availability = availability
if origin_dates is not None:
h.origin_dates = origin_dates
if origin_place is not None:
Expand Down Expand Up @@ -78,13 +88,26 @@ def loads(s):
filedesc = teiheader.filedesc
doc.title = filedesc.titlestmt.title.getText()
idnos = {}
for idno in filedesc.publicationstmt.find_all("idno"):
publication_stmt = filedesc.publicationstmt
for idno in publication_stmt.find_all("idno"):
typ = _normalize(idno.attrs.get("type"))
value = _normalize(idno.getText())
if not value:
continue
idnos[typ] = value
doc.idno = idnos
authority = publication_stmt.find("authority")
if authority:
doc.authority = _normalized_get_text(authority)
availability = publication_stmt.find("availability")
if availability:
availability_text = _normalized_get_text(availability)
license = availability.find("ref", type="license")
if license:
license_target = license.attrs.get("target")
if license_target:
availability_text += f" {license_target}"
doc.availability = availability_text

msdesc = filedesc.sourcedesc.msdesc
if msdesc:
Expand Down
2 changes: 1 addition & 1 deletion epidoc/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _normalize(v: T) -> T:
def _normalized_get_text(raw):
if not raw:
return None
parsed = raw.getText().strip()
parsed = raw.getText().strip().replace("\n", "")
return parsed if parsed else None


Expand Down
16 changes: 16 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
def assert_epi_doc(test, want, actual):
test.assertEqual(want.title, actual.title, msg="title")
test.assertEqual(want.idno, actual.idno, msg="idno")
test.assertEqual(want.authority, actual.authority, msg="authority")
test.assertEqual(want.availability, actual.availability, msg="availability")
test.assertEqual(want.material, actual.material, msg="material")
test.assertEqual(want.origin_dates, actual.origin_dates, msg="origin_dates")
test.assertEqual(want.origin_place, actual.origin_place, msg="origin_place")
Expand All @@ -24,6 +26,8 @@ def assert_epi_doc(test, want, actual):
test.assertEqual(want.commentary, actual.commentary, msg="commentary")
test.assertEqual(want.edition_language, actual.edition_language, msg="edition_language")
test.assertEqual(want.edition_foreign_languages, actual.edition_foreign_languages, msg="edition_foreign_languages")
test.assertEqual(want.reprint_from, actual.reprint_from, msg="reprint_from")
test.assertEqual(want.reprint_in, actual.reprint_in, msg="reprint_in")


class TestLoad(unittest.TestCase):
Expand All @@ -34,13 +38,16 @@ def test_all(self):
EpiDoc.create(
title="P.Heid.Arab. I p. 6-7",
idno={"filename": "pap(23new)", "tm": "106496"},
authority="The Arabic Papyrology Database",
availability="© The Arabic Papyrology Database. This work is licensed under a Creative Commons Attribution 3.0 License. http://creativecommons.org/licenses/by/3.0/",
),
),
(
os.path.join("apis", "yale.apis.0000540000.xml"),
EpiDoc.create(
title="Receipt, Roman CE ii (ca. 162) [BPG]",
idno={"apisid": "yale.apis.0000540000", "controlno": "(cty)54", "hgv": "20671", "tm": "20671"},
authority="APIS",
origin_dates=[{"notafter": "0199", "notbefore": "0100", "text": "Roman CE ii (ca. 162) [BPG]"}],
terms=[{"text": "Receipt"}, {"text": "Papyri"}],
languages={"en": "English", "grc": "Greek"},
Expand All @@ -57,8 +64,11 @@ def test_all(self):
"hgv": "114844",
"tm": "114844",
},
authority="Duke Collaboratory for Classics Computing (DC3)",
availability="© Duke Databank of Documentary Papyri. This work is licensed under a Creative Commons Attribution 3.0 License. http://creativecommons.org/licenses/by/3.0/",
languages={"en": "English", "la": "Latin"},
edition_language="la",
reprint_in=["p.ital;2;38/41"],
),
),
(
Expand All @@ -71,6 +81,8 @@ def test_all(self):
"hgv": "697551",
"tm": "697551",
},
authority="Duke Collaboratory for Classics Computing (DC3)",
availability="© Duke Databank of Documentary Papyri. This work is licensed under a Creative Commons Attribution 3.0 License. http://creativecommons.org/licenses/by/3.0/",
languages={"en": "English", "grc": "Greek"},
edition_language="grc",
),
Expand All @@ -86,6 +98,8 @@ def test_all(self):
"ldab": "5148",
"tm": "26761",
},
authority="Digital Corpus of Literary Papyri",
availability="© Digital Corpus of Literary Papyri. This work is licensed under a Creative Commons Attribution 3.0 License. http://creativecommons.org/licenses/by/3.0/",
material="papyrus",
origin_dates=[
{
Expand Down Expand Up @@ -132,6 +146,8 @@ def test_all(self):
"ldab": "135858",
"tm": "135858",
},
authority="Digital Corpus of Literary Papyri",
availability="© Digital Corpus of Literary Papyri. This work is licensed under a Creative Commons Attribution 3.0 License. http://creativecommons.org/licenses/by/3.0/",
material="parchment",
origin_dates=[
{
Expand Down

0 comments on commit 0872ea7

Please sign in to comment.