mcmaster_scraper.sync_api
1from pandas import DataFrame 2 3from . import async_api 4from ._utils.event_loop_wrapper import run_in_loop_sync 5 6 7def get_products_from_url(url: str, refresh: bool = False) -> DataFrame: 8 """Gets product tables from a McMaster-Carr URL. 9 10 If there are multiple product tables, they will be merged, 11 and an additional "Product Type" column will be added. 12 13 Parameters 14 ---------- 15 url : str 16 The URL to scrape. 17 Must be a valid McMaster-Carr URL. 18 The product tables must be visible on the webpage. 19 refresh : bool, default False 20 Whether to refresh the cached data. Default is False. 21 22 Returns 23 ------- 24 DataFrame 25 A pandas DataFrame containing the combined product tables. 26 27 Raises 28 ------ 29 ValueError 30 If the URL is not a valid McMaster-Carr URL. 31 UnsupportedOperation 32 If product table extraction is unsupported for the URL. 33 """ 34 return run_in_loop_sync(async_api.get_products_from_url(url, refresh)) 35 36 37def get_products_from_urls(urls: list[str], refresh: bool = False) -> list[DataFrame]: 38 """Gets product tables from a list of McMaster-Carr URLs. 39 40 See Also 41 -------- 42 get_products_from_url 43 """ 44 return run_in_loop_sync(async_api.get_products_from_urls(urls, refresh))
def
get_products_from_url(url: str, refresh: bool = False) -> pandas.DataFrame:
8def get_products_from_url(url: str, refresh: bool = False) -> DataFrame: 9 """Gets product tables from a McMaster-Carr URL. 10 11 If there are multiple product tables, they will be merged, 12 and an additional "Product Type" column will be added. 13 14 Parameters 15 ---------- 16 url : str 17 The URL to scrape. 18 Must be a valid McMaster-Carr URL. 19 The product tables must be visible on the webpage. 20 refresh : bool, default False 21 Whether to refresh the cached data. Default is False. 22 23 Returns 24 ------- 25 DataFrame 26 A pandas DataFrame containing the combined product tables. 27 28 Raises 29 ------ 30 ValueError 31 If the URL is not a valid McMaster-Carr URL. 32 UnsupportedOperation 33 If product table extraction is unsupported for the URL. 34 """ 35 return run_in_loop_sync(async_api.get_products_from_url(url, refresh))
Gets product tables from a McMaster-Carr URL.
If there are multiple product tables, they will be merged, and an additional "Product Type" column will be added.
Parameters
- url (str): The URL to scrape. Must be a valid McMaster-Carr URL. The product tables must be visible on the webpage.
- refresh (bool, default False): Whether to refresh the cached data. Default is False.
Returns
- DataFrame: A pandas DataFrame containing the combined product tables.
Raises
- ValueError: If the URL is not a valid McMaster-Carr URL.
- UnsupportedOperation: If product table extraction is unsupported for the URL.
def
get_products_from_urls(urls: list[str], refresh: bool = False) -> list[pandas.DataFrame]: