(* Creating a web page for lookout of plant names *)
unitefamilies = {
"Compositae" -> "Asteraceae",
"Asparagaceae" -> "Liliaceae",
"Leguminosae" -> "Fabaceae"};
maintainLookout := (
SetDirectory[dirwork];
filelook = "lookout.htm";
names = {
name = #[[1]];
name,
data = Drop[#, 1];
plantdatali[name] = "";
If[data === {},
Print["Warning: empty data for entry: ", name];
label = " EMPTY",
label = "- NO DATA";
id = data[[1]] // ToLowerCase;
(* Print["ID: ", id]; *)
If[StringMatchQ[id, RegularExpression["[0-9]+"]] || StringLength[id] > 30,
url = "http://www.catalogueoflife.org/col/webservice?id=" <> id <> "&response=full";
(* Temporary! for newer database. To search, use: http://www.catalogueoflife.org/annual-checklist/2017/search/all *)
url = "http://www.catalogueoflife.org/annual-checklist/2017/webservice?id=" <> id <> "&response=full";
url1 = "http://www.catalogueoflife.org/col/details/species/id/" <> id <> "";
plantdataurl[name] = url1;
plantdatali[name] = "
url1 <> "\">" <> name <> " from www.catalogueoflife.org
";
label = "+ catalogueoflife.org: " <> id;
filedata = ToFileName[dirdata, id <> ".xml"];
If[FileType[filedata] =!= File,
Print["Getting ", url];
resp = URLSave[url, filedata, {"Headers", "StatusCode"}];
If[resp[[2]] != 200,
Print["Warning: URL = ", url, ": bad response: ", resp];
If[FileType[filedata] === File,
Print["File ", filedata, " will be deleted!"];
Print[Import[filedata, "Text"]];
DeleteFile[filedata];
];
];
If[FileType[filedata] === File,
If[FileByteCount[filedata] < 1000,
Print["Warning: URL = ", url, ": too short output: ", Import[filedata, "Text"]];
Print["File ", filedata, " will be deleted!"];
DeleteFile[filedata];
];
];
];
If[FileType[filedata] === File, readdata1[filedata], Print["Warning: file ", filedata, " not found."];]
];
If[StringMatchQ[id, RegularExpression["[a-z]+[-][0-9]+"]] && StringLength[id] < 30,
url = "http://www.theplantlist.org/tpl1.1/record/" <> id;
url1 = url;
plantdataurl[name] = url1;
plantdatali[name] = " url1 <> "\">" <> name <> " from www.theplantlist.org
";
label = "* theplantlist.org: " <> id;
filedata = ToFileName[dirdata, id <> ".htm"];
If[FileType[filedata] =!= File,
Print["Getting ", url];
resp = URLSave[url, filedata, {"Headers", "StatusCode"}];
If[resp[[2]] != 200,
Print["Warning: URL = ", url, ": bad response: ", resp];
If[FileType[filedata] === File,
Print["File ", filedata, " will be deleted!"];
Print[Import[filedata, "Text"]];
DeleteFile[filedata];
];
];
];
If[FileType[filedata] === File, readdata2[filedata], Print["Warning: file ", filedata, " not found."];]
];
];
label} & /@ data;
text = {
"",
"Lookout",
Table[
url =
"http://www.catalogueoflife.org/col/search/all/key/" <>
StringReplace[name[[1]], " " -> "+"];
" url <> "\" TARGET=\"_blank\">" <> name[[1]] <>
"" <> name[[2]] <> "
", {name,
names}],
""};
Export[filelook, Flatten[text], "Lines"];
Print["Lookout web page " <> ToFileName[dirwork, filelook] <> " was created."];
(* Not found in databases *)
plantgenus[name_String] := StringReplace[name, RegularExpression["(.*?) .*"] -> "$1"];
plantcnames[name_String] = {};
);
readdata1[file_String] := Module[{dataxml, results, reps, msyn, cls, taxons, name0, cnames},
dataxml = Import[file, {"XML"}];
results = Cases[dataxml, XMLElement["result", _, a_] :> a, Infinity];
If[Length[results] != 1, Print["error reading file ", file, " : Length[results] != 1"]];
dataxml = results[[1]];
reps = {{a_} :> a, {s___String} :> StringJoin[Riffle[{s}, " "]], XMLElement[x_String, y_, z_String] :> "<" <> x <> ">" <> z <> "" <> x <> ">"};
plantname[name] = StringTrim[Cases[dataxml, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps];
name0 = Cases[dataxml, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps;
name0 = StringReplace[name0, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"];
noPrint["Name: ", plantname // FullForm];
plantsynonyms0 = Cases[dataxml, XMLElement["synonym", _, a_] :> a, Infinity];
msyn = Length[plantsynonyms0];
plantsynonyms1 = (Cases[#, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0;
plantsynonyms2 = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0;
noPrint["Synonyms (", msyn, "): ", {plantsynonyms1, plantsynonyms2} // Transpose // TableForm];
(* Simplification *)
plantsynonyms2 = stringtrim /@ plantsynonyms2;
plantsynonyms[name] = Complement[Union[StringReplace[#, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"] & /@ plantsynonyms2], {name0}];
cls = Cases[dataxml, XMLElement["classification", _, a_] :> a, Infinity];
If[Length[cls] != 1, Print["error reading classification in file ", file, " : Length[cls] != 1"]];
cls = cls[[1]];
taxons = Cases[cls, XMLElement["taxon", _, a_] :> a, Infinity];
planttaxons = {Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps,
Cases[#, XMLElement["rank", _, a_] :> a, Infinity, 1] //. reps} & /@ taxons;
plantclass0 = Select[planttaxons, #[[2]] === "Class" &][[1, 1]];
plantclass[name] = If[plantclass0 === "Gnetopsida", "Gymnospermous flowering plants", "Flowering plants (angiosperms)"];
(*plantclass[name] = Switch[plantclass0,
"Gnetopsida", "Gymnospermous flowering plants",
"Liliopsida", "Monocots",
_, "Flowering plants (angiosperms)"];*)
plantfamily[name] = Select[planttaxons, #[[2]] === "Family" &][[1, 1]] /. unitefamilies;
plantgenus[name] = Select[planttaxons, #[[2]] === "Genus" &][[1, 1]];
noPrint[{{"Class", plantclass}, {"Family", plantfamily}, {"Genus", plantgenus}} // TableForm];
cnames = Cases[dataxml, XMLElement["common_name", _, a_] :> a, Infinity];
plantcnames[name] = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ cnames;
];
readdata2[file_String] := Module[{data, name0, class, family, genus},
data = Import[file, {"HTML", "Data"}];
plantname0 = data[[2, 1]]//StringTrim;
plantname[name] = plantname0;
name0 = StringReplace[plantname0, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"];
plantsynonyms0 = Cases[data, {a_, "Synonym", _, _} :> a, Infinity];
plantsynonyms[name] = Complement[Union[StringReplace[#, RegularExpression["(.*?( \[Times])? .*?) .*"] -> "$1"] & /@ plantsynonyms0], {name0}];
data = Import[filedata, {"HTML", "Plaintext"}];
data = StringCases[data, RegularExpression["The Plant List[ ]*\n(.*?)\n(.*?)\n(.*)"] -> {"$1", "$2", "$3"}];
If[Length[data] != 1, Print["Error reading file ", file, " : Length[data] != 1"]];
{class, family, genus} = stringtrim /@ data[[1]];
plantclass[name] = If[class === "Angiosperms", "Flowering plants (angiosperms)", "Gymnospermous flowering plants"];
plantfamily[name] = family /. unitefamilies;
plantgenus[name] = genus;
];