diff --git a/wrangler_functions.py b/wrangler_functions.py index 9d2363334ec2f9b7dd4e0935f7a071e8975bf280..132e6c9fc8f472512efec836f0512c9e8a66bf4f 100644 --- a/wrangler_functions.py +++ b/wrangler_functions.py @@ -76,7 +76,7 @@ output_schema = {"GBIF_download_doi": "str", # Core functions >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> -def build_output_database(output_database): +def build_output_database(output_database: str) -> None: """ Create a database for storing occurrence and taxon concept data. The column names that are "camel case" are Darwin Core attributes, whereas @@ -86,7 +86,7 @@ def build_output_database(output_database): Parameters ---------- - output_database : Path for sqlite database to create; string. + output_database : Path for sqlite database to create Returns ------- @@ -108,8 +108,8 @@ def build_output_database(output_database): return -def get_EBD_records(taxon_info, filter_set, working_directory, EBD_file, - query_name, R_home): +def get_EBD_records(taxon_info: dict, filter_set: dict, working_directory: str, EBD_file: str, + query_name: str, R_home: str) -> pd.DataFrame: ''' Gets eBird records from a copy of the Ebird Basic Dataset that you acquired. Primarily runs R code that uses the Auk package to query the @@ -119,12 +119,12 @@ def get_EBD_records(taxon_info, filter_set, working_directory, EBD_file, Parameters ---------- - taxon_info : your taxon concept; dictionary - filter_set : name of the filter set to apply; dictionary - working_directory : path to use for table of filtered query results; string - EBD_file : path to your downloaded copy of the Ebird Basic Dataset; string - query_name : the name you chose for your query; string - R_home : path to R install to use, get from wranglerconfig; string + taxon_info : your taxon concept + filter_set : name of the filter set to apply + working_directory : path to use for table of filtered query results + EBD_file : path to your downloaded copy of the Ebird Basic Dataset + query_name : the name you chose for your query + R_home : path to R install to use, get from wranglerconfig Returns ------- @@ -449,8 +449,8 @@ def get_EBD_records(taxon_info, filter_set, working_directory, EBD_file, return records2 -def get_GBIF_records(taxon_info, filter_set, query_name, working_directory, - username, password, email): +def get_GBIF_records(taxon_info: dict, filter_set: dict, query_name: str, working_directory: str, + username: str, password: str, email: str) -> pd.DataFrame: ''' Retrieves species occurrence records from GBIF. Filters occurrence records, buffers the xy points, and saves them in a database. Finally, @@ -461,13 +461,13 @@ def get_GBIF_records(taxon_info, filter_set, query_name, working_directory, Parameters ---------- - taxon_info : your taxon concept; dictionary - filter_set : name of the filter set to apply; dictionary - query_name : the name you chose for your query; string - working_directory : path to use for table of filtered query results; string - username : your GBIF username; string - password : your GBIF password; string - email : the email account associated with your GBIF account; string + taxon_info : your taxon concept + filter_set : name of the filter set to apply + query_name : the name you chose for your query + working_directory : path to use for table of filtered query results + username : your GBIF username + password : your GBIF password + email : the email account associated with your GBIF account Returns ------- @@ -781,8 +781,8 @@ def get_GBIF_records(taxon_info, filter_set, query_name, working_directory, return records2 -def process_records(ebird_data, gbif_data, filter_set, taxon_info, - working_directory, query_name): +def process_records(ebird_data: pd.DataFrame, gbif_data: pd.DataFrame, filter_set: str, taxon_info: dict, + working_directory: dict, query_name: str) -> pd.DataFrame: ''' Summarizes the values in the data frames, populates some fields, apply filters, summarize what values persisted after filtering. Insert @@ -1117,7 +1117,7 @@ def process_records(ebird_data, gbif_data, filter_set, taxon_info, return None -def nominal_precisions(longitude, latitude, produce): +def nominal_precisions(longitude: str, latitude: str, produce: str) -> float: ''' Calculates the nominal precisions based on WGS84 coordinates. Method is based on information from wikipedia page on latitude and posts at @@ -1158,7 +1158,7 @@ def nominal_precisions(longitude, latitude, produce): return y -def drop_duplicates_latlongdate(df): +def drop_duplicates_latlongdate(df: pd.DataFrame) -> pd.DataFrame: ''' Function to find and remove duplicate occurrence records within the wildlife wrangler workflow. When duplicates exist, the record with the @@ -1309,7 +1309,7 @@ def drop_duplicates_latlongdate(df): return df2 -def verify_results(database): +def verify_results(database: str) -> None: ''' Compares the occurrence record attributes to the filters that were supposed to be applied. @@ -1317,7 +1317,7 @@ def verify_results(database): Parameters ---------- - database : path to a wrangler output database; string. + database : path to a wrangler output database Like "Z:/Occurrence_Records/test1.sqlite" RESULTS @@ -1474,14 +1474,14 @@ def verify_results(database): # Helper functions >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> -def get_GBIF_code(name, rank='species'): +def get_GBIF_code(name: str, rank: str ='species') -> str: """ Returns the GBIF species code for a scientific name. Parameters ---------- - name : Scientific name; string - rank : level of taxonomic hieriarchy to search; string + name : Scientific name + rank : level of taxonomic hieriarchy to search Returns ------- @@ -1493,7 +1493,7 @@ def get_GBIF_code(name, rank='species'): return key -def coord_rounded(x: str, digits: int): +def coord_rounded(x: str, digits: int) -> str: ''' Rounds a decimal degrees coordinate value to a specified precision (number of digits). Returns pd.NA when a bad value is encountered. Works for @@ -1520,15 +1520,13 @@ def coord_rounded(x: str, digits: int): return pd.NA -def get_record_details(key): +def get_record_details(key: int) -> dict: """ Get the details of a records from GBIF Parameters ---------- key : a gbif taxon code - name : Scientific name; string - rank : level of taxonomic hieriarchy to search; string Returns ------- @@ -1578,7 +1576,7 @@ def nominal_x_precision(dataframe: pd.DataFrame, lat_column: str, return df -def ccw_wkt_from_shapefile(shapefile, out_txt): +def ccw_wkt_from_shapefile(shapefile: str, out_txt: str) -> None: """ Creates wkt with coordinates oriented counter clockwise for a given shapefile. Shapefiles are oriented clockwise, which is incompatible with @@ -1587,8 +1585,8 @@ def ccw_wkt_from_shapefile(shapefile, out_txt): Parameters ---------- - shapefile : path to the shpefile to read; string - out_txt : path to the text file to write the wkt to; string + shapefile : path to the shpefile to read + out_txt : path to the text file to write the wkt to Returns ------- @@ -1629,14 +1627,14 @@ def ccw_wkt_from_shapefile(shapefile, out_txt): return -def spatial_output(database, make_file, mode, output_file=None, epsg=4326): +def spatial_output(database: str, make_file: bool, mode: str, output_file: str =None, epsg: int = 4326): ''' Creates a shapefile of species occurrence records from a wildlife wrangler output SQLite database. Parameters ---------- - database : the sqlite database to use; string (path) + database : the sqlite database to use make_file : whether to save a shapefile. False just returns a geodatframe. mode : three options: 1) "points" merely creates points from record coordinates. @@ -1756,7 +1754,7 @@ def spatial_output(database, make_file, mode, output_file=None, epsg=4326): return out -def CONUS_bbox(): +def CONUS_bbox() -> tuple: """ Returns the bounding box of the conterminous U.S. as a tuple. """