(* Creating or reading database for families *) maintainFamilydata := ( filesave0 = ToFileName[{dirwork}, "familiesdata.m"]; If[FileType[filesave0] =!= File, url = "https://florabase.dpaw.wa.gov.au/browse/family/"; letters = CharacterRange["a", "z"]; Print["Creating database for families. Finding f-numbers ..."]; Do[ filesave = ToFileName[{dirwork, "data", "families"}, letter <> ".m"]; If[FileType[filesave] === File, Clear[fnumbers]; Get[filesave]; If[ValueQ[fnumbers], Continue[], DeleteFile[filesave]]]; fl = URLFetch[url <> letter]; fnumbers = StringCases[fl, RegularExpression[" "$1"]; Print[letter, " -> ", fnumbers]; If[Head[fnumbers] =!= List, Print["Error: Head[fnumbers]=!=List: ", letter, " - ", fnumbers]; Continue[]]; test = StringMatchQ[#, RegularExpression["\\d+"]] & /@ fnumbers; If[! (And @@ test), Print["Error: !(And@@test): ", letter, " - ", test]; Continue[]]; Print[" - saving fnumbers to the file ", filesave]; Save[filesave, fnumbers], {letter, letters}]; fnumbers = Union @@ Table[ filesave = ToFileName[{dirwork, "data", "families"}, letter <> ".m"]; Clear[fnumbers]; Get[filesave]; fnumbers, {letter, letters}]; Print["Total number of families on the web page ", url, ": ", Length[fnumbers]]; SetDirectory[ToFileName[{dirwork, "data", "families"}]]; files = FileNames[]; files = StringReplace[#, RegularExpression["[.].+$"] -> ""] & /@ files; fnumbers1 = Complement[fnumbers, files]; url1 = "https://florabase.dpaw.wa.gov.au/browse/profile/"; If[fnumbers1 =!= {}, Print["Total number of families to retrieve from the web page ", url1, ": ", Length[fnumbers1]]; Print["Fetching web pages for fnumbers ..."]; Do[ filesave = ToFileName[{dirwork, "data", "families"}, fnumber <> ".htm"]; URLSave[url1 <> fnumber, filesave]; Print[fnumber, " -> ", filesave], {fnumber, fnumbers1}]; ]; Print["Extracting information from the web pages ..."]; ClearAll[familyName, familyAuthor, familyReference, familyCommonName, familyList]; Do[ filesave = ToFileName[{dirwork, "data", "families"}, fnumber <> ".htm"]; text = Import[filesave, "Text"]; (* Head *) head = StringCases[text, RegularExpression[ "(?i)(?m)(?s)

\\s*(.*?)\\s*

"] -> "$1"]; Print[fnumber, " -> ", head]; If[!MatchQ[head, {_String}], Print["Error: head === ", head], head = head[[1]]; fname = StringCases[head, RegularExpression[ "(?i)(?m)(?s)\\s*(.*?)\\s*"] -> "$1"]; If[!MatchQ[fname, {_String}], Print["Error: fname === ", fname], fname = fname[[1]]; Print["*** Name: ", fname]; familyName[fnumber] = fname; frest = StringCases[head, RegularExpression[ "(?i)(?m)(?s)\\s*(.*?)\\s*\\s*(.*)"] -> "$2"]; If[frest =!= {}, frest = frest[[1]]; fauthor = StringReplace[frest, RegularExpression["(?i)(?m)(?s)\\s*
.*"] -> ""]; Print[" -- Author: ", fauthor]; familyAuthor[fname] = fauthor; fref = StringCases[frest, RegularExpression[ "(?i)(?m)(?s)\\s*(.*?)\\s*\\s*(.*)"] -> "$1"]; If[fref =!= {}, fref = fref[[1]]; Print[" -- Reference: ", fref]; familyReference[fname] = fref; ]; ]; fcname = StringCases[text, RegularExpression[ "(?i)(?m)(?s)

Common name(\\s|[.])*(.*?)(\\s|[.])*

"] -> "$2"]; If[fcname =!= {}, fcname = fcname[[1]]; Print[" -- Common name: ", fcname]; familyCommonName[fname] = fcname; ]; ]; ]; , {fnumber, fnumbers}]; familyList = familyName /@ fnumbers; Save[filesave0, {familyName, familyAuthor, familyReference, familyCommonName, familyList}], Print["Family Data: reading definitions: familyName, familyAuthor, familyReference, familyCommonName, familyList ..."]; Get[filesave0]; Print[" - total number of families in database: ", Length[familyList]]; ]; (* If[FileType[filesave0] =!= File, *) (* Print["Families that are not in the database: ", Complement[plantfamilies, familyList]]; familylist1 = Select[familyList, Head[familyCommonName[#]] === String &]; Print["Families in the database without common names: ", Complement[familyList, familylist1]]; Print["Families without common names: ", Complement[plantfamilies, familylist1]]; *) (* Missing families *) familyCommonName["Caricaceae"] = "Papaya Family"; familyCommonName["Cistaceae"] = "Rock Rose Family"; familyCommonName["Cleomaceae"] = "Cleome Family"; (*familyCommonName["Cynomoriaceae"] = "Desert Thumb Family";*) (*familyCommonName["Didiereaceae"] = "XXX Family";*) familyCommonName["Ephedraceae"] = "Horsetail Family"; familyCommonName["Fabaceae"] = "Bean Family"; familyCommonName["Liliaceae"] = "Lily Family"; familyCommonName["Moringaceae"] = "Drumstick Tree Family"; familyCommonName["Malpighiaceae"] = "Barbados cherry Family"; familyAuthor["Malpighiaceae"] = "A.Juss."; (*familyCommonName["Muntingiaceae"] = "XXX Family";*) (*familyCommonName["Neuradaceae"] = "XXX Family";*) (*familyCommonName["Phyllanthaceae"] = "XXX Family";*) familyCommonName["Ruppiaceae"] = "Widgeonweeds Family"; (*familyCommonName["Salvadoraceae"] = "XXX Family";*) familyCommonName["Heliotropiaceae"] = "Heliotrope Family"; familyCommonName["Polygonaceae"] = "Buckwheat Family"; (* it is read with error *) familyCommonName["Rutaceae"] = "Rue or Citrus Family"; )