++ Currently, the examples are being reworked after the latest update because GBIF behaves differently now. Find out more. ++
This workflow uses the VAT to compare the occurrence of Canis lupus and Felis silvestris as a function of land use classification from the Ökosystematlas.
The purpose of this notebook is also to demonstrate the capabilities of Geo Engine. Therefore some useful techniques will be shown:
When building your own nested workflow, it is recommended to build it in several steps as shown in this notebook.
Documentation about the operators and how to use them in Python can be found here: https://docs.geoengine.io/operators/intro.html
#Import packages import geoengine as ge import geoengine_openapi_client from datetime import datetime from geoengine.types import RasterBandDescriptor import altair as alt alt.renderers.enable('default')
RendererRegistry.enable('default')
#Initialize Geo Engine in VAT ge.initialize("https://vat.gfbio.org/api")
#Get the GBIF DataProvider id (Useful for translating the DataProvider name to its id) root_collection = ge.layer_collection() gbif_prov_id = '' for elem in root_collection.items: if elem.name == 'GBIF': gbif_prov_id = str(elem.provider_id) gbif_prov_id
'1c01dbb9-e3ab-f9a2-06f5-228ba4b6bf7a'
This chapter is not required and only shows that country borders are available.
#Create workflow to request German border workflow_germany = ge.register_workflow({ "type": "Vector", "operator": { "type": "OgrSource", "params": { "data": "germany", } } }) workflow_germany
2429a993-385f-546f-b4f7-97b3ba4a5adb
#Set time start_time = datetime.strptime( '2000-04-01T12:00:00.000Z', "%Y-%m-%dT%H:%M:%S.%f%z") end_time = datetime.strptime( '2030-04-01T12:00:00.000Z', "%Y-%m-%dT%H:%M:%S.%f%z") #Request the data from Geo Engine into a geopandas dataframe data = workflow_germany.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data.plot()
<Axes: >
This chapter is not needed and only shows that raster data is also available.
#Create a workflow to request the oekosystematlas raster data workflow_oekosystematlas = ge.register_workflow({ "type": "Raster", "operator": { "type": "GdalSource", "params": { "data": "oekosystematlas" } } }) workflow_oekosystematlas
8a859eeb-0778-5190-a9d1-b1f787e4176d
#Request the data from Geo Engine into a xarray dataarray data = workflow_oekosystematlas.get_xarray( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data.plot(vmax=75)
/home/duempelmann/geoengine_env/lib/python3.10/site-packages/owslib/coverage/wcs110.py:85: FutureWarning: The behavior of this method will change in future versions. Use specific 'len(elem)' or 'elem is not None' test instead. elem = self._capabilities.find(self.ns.OWS('ServiceProvider')) or self._capabilities.find(self.ns.OWS('ServiceProvider')) # noqa <matplotlib.collections.QuadMesh at 0x7f67d1c4ada0>
None of the following steps are theoretically necessary, as the entire workflow will be projected in the nested request in the end. However, the steps are intended to show the capabilities of Geo Engine.
#Create workflow to request Canis lupus incidents workflow_canis_lupus = ge.register_workflow({ "type": "Vector", "operator": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Canis lupus`", } } }) workflow_canis_lupus.get_result_descriptor()
Data type: MultiPoint Spatial Reference: EPSG:4326 Columns: gbifid: Column Type: int Measurement: unitless scientificname: Column Type: text Measurement: unitless basisofrecord: Column Type: text Measurement: unitless
#Request the data from Geo Engine into a geopandas dataframe data = workflow_canis_lupus.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data.plot()
#Create workflow to request Canis lupus incidents filtered by German border workflow_canis_lupus_cut = ge.register_workflow({ "type": "Vector", "operator": { "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { #Canis lupus ############################### "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Canis lupus`", "attributeProjection": [] } }, ##################################################### "polygons": { #Germany ################################# "type": "OgrSource", "params": { "data": "germany" } } ###################################################### } } }) workflow_canis_lupus_cut
f30ac841-81b0-5301-bac6-840dd914c1ba
#Request the data from Geo Engine into a geopandas dataframe data_canis_lupus = workflow_canis_lupus_cut.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data_canis_lupus.plot()
#Create a workflow to request Canis lupus occurrences filtered by the German border and linked to the Ökosystematlas data. workflow_canis_lupus_cut_join = ge.register_workflow({ "type": "Vector", "operator": { "type": "RasterVectorJoin", "params": { "names": { "type": "names", "values": ["Ökosystematlas"] }, "temporalAggregation": "none", "featureAggregation": "mean", }, "sources": { "vector": { #Canis lupus cut ###################################### "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Canis lupus`", "attributeProjection": [] } }, "polygons": { "type": "OgrSource", "params": { "data": "germany" } } } }, ############################################################## "rasters": [{ #Ökosystematlas ################################### "type": "GdalSource", "params": { "data": "oekosystematlas" } }] ############################################################## }, } }) workflow_canis_lupus_cut_join
2c8ebbbc-b848-58e6-8f5c-f51976db3c8f
#Request the data from Geo Engine into a geopandas dataframe data = workflow_canis_lupus_cut_join.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ), resolve_classifications=True ) #Show the geopandas dataframe data
1341 rows × 7 columns
It can be seen that the Ökosystematlas variable is numerical, while the classes are human-readable encoded in the metadata of the files. This can be adjusted using a class histogram
#Create a workflow to plot Canis lupus occurrences filtered by the German border and merged with Ökosystematlas data as a class histogram. workflow_canis_lupus_full = ge.register_workflow({ "type": "Plot", "operator": { "type": "ClassHistogram", "params": { "columnName": "Ökosystematlas" }, "sources": { "source": { #Canis lupus cut join ##################################### "type": "RasterVectorJoin", "params": { "names": { "type": "names", "values": ["Ökosystematlas"] }, "temporalAggregation": "none", "featureAggregation": "mean", }, "sources": { "vector": { "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Canis lupus`", "attributeProjection": [] } }, "polygons": { "type": "OgrSource", "params": { "data": "germany" } } } }, "rasters": [{ "type": "GdalSource", "params": { "data": "oekosystematlas" } }] } } ###################################################################### } } }) workflow_canis_lupus_full
b182c10b-59ce-5d5b-946f-fccc3ae04c88
#Request the plot from Geo Engine plot_canis_lupus = workflow_canis_lupus_full.plot_chart( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Show the plot alt.Chart.from_dict(plot_canis_lupus.spec)
#Create workflow to request Felis silvestris occurrences workflow_felis_silvestris = ge.register_workflow({ "type": "Vector", "operator": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Felis silvestris`", } } }) workflow_felis_silvestris
f8d5abd5-7d5f-567e-97a2-7830052d6cbf
#Request the data from Geo Engine into a geopandas dataframe data = workflow_felis_silvestris.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data.plot()
#Create workflow to request Felis silvestris occurrences filtered by German border workflow_felis_silvestris_cut = ge.register_workflow({ "type": "Vector", "operator": { "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { #Felis silvestris ################################ "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Felis silvestris`", "attributeProjection": [] } }, ########################################################### "polygons": { #Germany ####################################### "type": "OgrSource", "params": { "data": "germany" } } ############################################################ } } }) workflow_felis_silvestris_cut
518c27b3-0ce7-56ac-b826-5a72be463a73
#Request the data from Geo Engine into a geopandas dataframe data_felis_silvestris = workflow_felis_silvestris_cut.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Plot the data data_felis_silvestris.plot()
#Create a workflow to request Felis silvestris occurrences filtered by the German border and linked to the Ökosystematlas data. workflow_felis_silvestris_cut_join = ge.register_workflow({ "type": "Vector", "operator": { "type": "RasterVectorJoin", "params": { "names": { "type": "names", "values": ["Ökosystematlas"] }, "temporalAggregation": "none", "featureAggregation": "mean", }, "sources": { "vector": { #Felis silvestris cut ##################################### "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Felis silvestris`", "attributeProjection": [] } }, "polygons": { "type": "OgrSource", "params": { "data": "germany" } } } }, ################################################################### "rasters": [{ #Ökosystematlas ######################################## "type": "GdalSource", "params": { "data": "oekosystematlas" } }] ################################################################### }, } }) workflow_felis_silvestris_cut_join
355b4e59-65cc-5cfe-a0b4-636f4d41beab
#Request the data from Geo Engine into a geopandas dataframe data = workflow_felis_silvestris_cut_join.get_dataframe( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ), resolve_classifications=True ) #Show the geopandas dataframe data
1121 rows × 7 columns
#Create a workflow to plot Felis silvestris occurrences filtered by the German border and merged with the Ökosystematlas data as a class histogram. workflow_felis_silvestris_full = ge.register_workflow({ "type": "Plot", "operator": { "type": "ClassHistogram", "params": { "columnName": "Ökosystematlas" }, "sources": { "source": { "type": "RasterVectorJoin", "params": { "names": { "type": "names", "values": ["Ökosystematlas"] }, "temporalAggregation": "none", "featureAggregation": "mean", }, "sources": { "vector": { "type": "PointInPolygonFilter", "params": {}, "sources": { "points": { "type": "OgrSource", "params": { "data": f"_:{gbif_prov_id}:`species/Felis silvestris`", "attributeProjection": [] } }, "polygons": { "type": "OgrSource", "params": { "data": "germany" } } } }, "rasters": [{ "type": "GdalSource", "params": { "data": "oekosystematlas" } }] } } } } }) workflow_felis_silvestris_full
db03640c-cf0e-5fe0-978c-f45a55eb5da3
#Request the plot from Geo Engine plot_felis_silvestris = workflow_felis_silvestris_full.plot_chart( ge.QueryRectangle( ge.BoundingBox2D(5.852490, 47.271121, 15.022059, 55.065334), ge.TimeInterval(start_time, end_time), resolution=ge.SpatialResolution(0.1, 0.1), srs="EPSG:4326" ) ) #Show the plot alt.Chart.from_dict(plot_felis_silvestris.spec)
#Show the plot from Canis lupus alt.Chart.from_dict(plot_canis_lupus.spec)
#Show the plot from Felis silvestris alt.Chart.from_dict(plot_felis_silvestris.spec)
In this chapter, some other useful links between Geo Engine and Python are shown.
#Comparison plots import pandas as pd # Convert the JSON data to pandas DataFrames df1 = pd.DataFrame(plot_canis_lupus.spec['data']['values']) df2 = pd.DataFrame(plot_felis_silvestris.spec['data']['values']) df1['dataset'] = 'Canis lupus' df2['dataset'] = 'Felis silvestris' combined_df = pd.concat([df1, df2]) chart = alt.Chart(combined_df).mark_bar().encode( x=alt.X('Land Cover:N', title='Land Cover'), y=alt.Y('Frequency:Q', title='Frequency'), color=alt.Color('dataset:N', title='Dataset'), xOffset=alt.Color('dataset:N', title='Dataset') ).properties(width=600) # Display the grouped barplot chart
#Plotting of multiple species import geopandas as gpd gdf1 = data_canis_lupus gdf2 = data_felis_silvestris gdf1['dataset'] = 'Canis lupus' gdf2['dataset'] = 'Felis silvestris' combined_gdf = pd.concat([gdf1, gdf2]) combined_gdf.plot(column='dataset', cmap='rainbow', markersize=5, legend=True)