(* Creating a web page for lookout of plant names *) unitefamilies = { "Compositae" -> "Asteraceae", "Asparagaceae" -> "Liliaceae", "Leguminosae" -> "Fabaceae"}; maintainLookout := ( SetDirectory[dirwork]; filelook = "lookout.htm"; names = { name = #[[1]]; name, data = Drop[#, 1]; plantdatali[name] = ""; If[data === {}, Print["Warning: empty data for entry: ", name]; label = " EMPTY", label = "- NO DATA"; id = data[[1]] // ToLowerCase; (* Print["ID: ", id]; *) If[StringMatchQ[id, RegularExpression["[0-9]+"]] || StringLength[id] > 30, url = "http://www.catalogueoflife.org/col/webservice?id=" <> id <> "&response=full"; url1 = "http://www.catalogueoflife.org/col/details/species/id/" <> id <> ""; plantdataurl[name] = url1; plantdatali[name] = "
  • url1 <> "\">" <> name <> " from www.catalogueoflife.org"; label = "+ catalogueoflife.org: " <> id; filedata = ToFileName[dirdata, id <> ".xml"]; If[FileType[filedata] =!= File, Print["Getting ", url]; resp = URLSave[url, filedata, {"Headers", "StatusCode"}]; If[resp[[2]] != 200, Print["Warning: URL = ", url, ": bad response: ", resp]; If[FileType[filedata] === File, Print["File ", filedata, " will be deleted!"]; Print[Import[filedata, "Text"]]; DeleteFile[filedata]; ]; ]; If[FileType[filedata] === File, If[FileByteCount[filedata] < 1000, Print["Warning: URL = ", url, ": too short output: ", Import[filedata, "Text"]]; Print["File ", filedata, " will be deleted!"]; DeleteFile[filedata]; ]; ]; ]; If[FileType[filedata] === File, readdata1[filedata], Print["Warning: file ", filedata, " not found."];] ]; If[StringMatchQ[id, RegularExpression["[a-z]+[-][0-9]+"]] && StringLength[id] < 30, url = "http://www.theplantlist.org/tpl1.1/record/" <> id; url1 = url; plantdataurl[name] = url1; plantdatali[name] = "
  • url1 <> "\">" <> name <> " from www.theplantlist.org"; label = "* theplantlist.org: " <> id; filedata = ToFileName[dirdata, id <> ".htm"]; If[FileType[filedata] =!= File, Print["Getting ", url]; resp = URLSave[url, filedata, {"Headers", "StatusCode"}]; If[resp[[2]] != 200, Print["Warning: URL = ", url, ": bad response: ", resp]; If[FileType[filedata] === File, Print["File ", filedata, " will be deleted!"]; Print[Import[filedata, "Text"]]; DeleteFile[filedata]; ]; ]; ]; If[FileType[filedata] === File, readdata2[filedata], Print["Warning: file ", filedata, " not found."];] ]; ]; label} & /@ data; text = { "", "Lookout", Table[ url = "http://www.catalogueoflife.org/col/search/all/key/" <> StringReplace[name[[1]], " " -> "+"]; " url <> "\" TARGET=\"_blank\">" <> name[[1]] <> "" <> name[[2]] <> "
    ", {name, names}], ""}; Export[filelook, Flatten[text], "Lines"]; Print["Lookout web page " <> ToFileName[dirwork, filelook] <> " was created."]; (* Not found in databases *) plantgenus[name_String] := StringReplace[name, RegularExpression["(.*?) .*"] -> "$1"]; plantcnames[name_String] = {}; ); readdata1[file_String] := Module[{dataxml, results, reps, msyn, cls, taxons, name0, cnames}, dataxml = Import[file, {"XML"}]; results = Cases[dataxml, XMLElement["result", _, a_] :> a, Infinity]; If[Length[results] != 1, Print["error reading file ", file, " : Length[results] != 1"]]; dataxml = results[[1]]; reps = {{a_} :> a, {s___String} :> StringJoin[Riffle[{s}, " "]], XMLElement[x_String, y_, z_String] :> "<" <> x <> ">" <> z <> " x <> ">"}; plantname[name] = StringTrim[Cases[dataxml, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps]; name0 = Cases[dataxml, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps; name0 = StringReplace[name0, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"]; noPrint["Name: ", plantname // FullForm]; plantsynonyms0 = Cases[dataxml, XMLElement["synonym", _, a_] :> a, Infinity]; msyn = Length[plantsynonyms0]; plantsynonyms1 = (Cases[#, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0; plantsynonyms2 = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0; noPrint["Synonyms (", msyn, "): ", {plantsynonyms1, plantsynonyms2} // Transpose // TableForm]; (* Simplification *) plantsynonyms2 = stringtrim /@ plantsynonyms2; plantsynonyms[name] = Complement[Union[StringReplace[#, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"] & /@ plantsynonyms2], {name0}]; cls = Cases[dataxml, XMLElement["classification", _, a_] :> a, Infinity]; If[Length[cls] != 1, Print["error reading classification in file ", file, " : Length[cls] != 1"]]; cls = cls[[1]]; taxons = Cases[cls, XMLElement["taxon", _, a_] :> a, Infinity]; planttaxons = {Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps, Cases[#, XMLElement["rank", _, a_] :> a, Infinity, 1] //. reps} & /@ taxons; plantclass0 = Select[planttaxons, #[[2]] === "Class" &][[1, 1]]; plantclass[name] = If[plantclass0 === "Gnetopsida", "Gymnospermous flowering plants", "Flowering plants (angiosperms)"]; (*plantclass[name] = Switch[plantclass0, "Gnetopsida", "Gymnospermous flowering plants", "Liliopsida", "Monocots", _, "Flowering plants (angiosperms)"];*) plantfamily[name] = Select[planttaxons, #[[2]] === "Family" &][[1, 1]] /. unitefamilies; plantgenus[name] = Select[planttaxons, #[[2]] === "Genus" &][[1, 1]]; noPrint[{{"Class", plantclass}, {"Family", plantfamily}, {"Genus", plantgenus}} // TableForm]; cnames = Cases[dataxml, XMLElement["common_name", _, a_] :> a, Infinity]; plantcnames[name] = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ cnames; ]; readdata2[file_String] := Module[{data, name0, class, family, genus}, data = Import[file, {"HTML", "Data"}]; plantname0 = data[[2, 1]]//StringTrim; plantname[name] = plantname0; name0 = StringReplace[plantname0, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"]; plantsynonyms0 = Cases[data, {a_, "Synonym", _, _} :> a, Infinity]; plantsynonyms[name] = Complement[Union[StringReplace[#, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"] & /@ plantsynonyms0], {name0}]; data = Import[filedata, {"HTML", "Plaintext"}]; data = StringCases[data, RegularExpression["The Plant List[ ]*\n(.*?)\n(.*?)\n(.*)"] -> {"$1", "$2", "$3"}]; If[Length[data] != 1, Print["Error reading file ", file, " : Length[data] != 1"]]; {class, family, genus} = stringtrim /@ data[[1]]; plantclass[name] = If[class === "Angiosperms", "Flowering plants (angiosperms)", "Gymnospermous flowering plants"]; plantfamily[name] = family /. unitefamilies; plantgenus[name] = genus; ];