Loading src/common/error_codes.py +8 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' This module defines a data class 'ErrorCodes' that instantiates various error codes used throughout the application. It categorizes error codes into distinct sections for database operations, scraping processes, templating issues, and provides a default error code for general use. Each error type is associated with specific integer values, making it easier to manage and identify errors consistently across different components of the application. ''' from dataclasses import dataclass Loading src/common/exceptions.py +7 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' This module defines custom exception classes for various application components. Each exception class validates specific error codes from 'ErrorCodes'. Included are 'ScraperException', 'DatabaseException', 'TemplateException', and 'ParserError', each tailored to a specific component and type of error within the application. ''' from common.error_codes import ErrorCodes Loading src/common/scrapers/scripter.pydeleted 100755 → 0 +0 −25 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Provide a script handler method which can process sequentially, parallel.""" import os from pathlib import Path from common.logz import create_logger def get_all_scripts(script_dir, logger=create_logger()): """Retrieve all scripts in the specified directory.""" r_scripts = [] py_scripts = [] sh_scripts = [] script_path = Path(script_dir) if not script_path.is_dir(): logger.error(f"{script_dir} is not a directory or does not exist.") pass logger.info(f"looking for scripts in {script_dir}") logger.debug(f"Directory contents: {os.listdir(script_dir)}") r_scripts = list(script_path.glob("*.R")) py_scripts = list(script_path.glob("*.py")) sh_scripts = list(script_path.glob("*.sh")) pl_scripts = list(script_path.glob("*.pl")) return [str(x.resolve) for x in r_scripts + py_scripts + sh_scripts + pl_scripts] src/common/scrapers/static.py +4 −3 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Provide static objects.""" from dataclasses import dataclass import re @dataclass class Colors: """Provide a dataclass of colors for pretty printing.""" Loading @@ -17,7 +18,7 @@ class Colors: WHITE = "\33[37m" RESET = "\33[39m" @dataclass class UrlRegex: """Define a URL Regex.""" Loading @@ -31,7 +32,7 @@ class UrlRegex: + r"([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*" ) @dataclass class ImageConfig: """Static dictionaries and values for OCR of images.""" Loading src/common/scrapers/webdriver.py +1 −87 Original line number Diff line number Diff line Loading @@ -336,89 +336,3 @@ class WebDriver: raise ParserError(msg) except Exception as e: self.logger.error(f"Unknown exception while waiting for element: " f"{e}") No newline at end of file if __name__ == "__main__": import time from bs4 import BeautifulSoup as Soup from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import Select test_url = "http://www.example.com" print("Testing if requests work") with WebDriver(url=test_url, driver="curl") as d: source = d.dump_out() print(source) with WebDriver( url=test_url, driver="chromedriver", options=[ "--no-sandbox", "--disable-gpu", "--disable-logging", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--no-zygote", "headless", ], service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any"], ) as d: source = d.driver.page_source print(source) print("Running example EI ID 182") url_182 = "https://ebill.kcelectric.coop/woViewer/mapviewer.html?" url_182 = url_182 + "config=Outage+Web+Map" chrome_opts = [ "--no-sandbox", "--disable-gpu", "--disable-logging", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--no-zygote", "headless", ] service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"] with WebDriver( url=url_182, driver="chromedriver", options=chrome_opts, service_args=service_args, ) as d: xpath = '//div[@id="OMS.Customers Summary"]' # d.wait_for_element(xpath, 'xpath') # arget = d.driver.find_element_by_xpath(xpath) target = d.get_xpath(xpath) ActionChains(d.driver).move_to_element(target).click(target).perform() d.wait_for_element("select", "tag") select = Select(d.driver.find_element_by_tag_name("select")) time.sleep(2) select.select_by_visible_text("County") source = d.driver.page_source soup = Soup(source, "html.parser") table = soup.findAll("table", {"class": "GNBU0IVDGE summary-table"}) rows = table[0].find_all("td") regions = [] custs_out = [] custs_served = [] for row in rows: if "summary-region-column" in str(row): regions.append( row.get_text() .replace(" County", "") .replace(" COUNTY", "") .strip() .replace("ST ", "ST. ") ) elif "summary-number-out-column" in str(row): custs_out.append(row.get_text()) elif ( "summary-number-served-column" in str(row) and "GMFGE5DLD" not in str(row) and "%" not in row.get_text() ): custs_served.append(row.get_text()) else: pass print("Regions found: %s" % regions) print("Customers out: %s" % custs_out) print("Custs served: %s" % custs_served) Loading
src/common/error_codes.py +8 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' This module defines a data class 'ErrorCodes' that instantiates various error codes used throughout the application. It categorizes error codes into distinct sections for database operations, scraping processes, templating issues, and provides a default error code for general use. Each error type is associated with specific integer values, making it easier to manage and identify errors consistently across different components of the application. ''' from dataclasses import dataclass Loading
src/common/exceptions.py +7 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' This module defines custom exception classes for various application components. Each exception class validates specific error codes from 'ErrorCodes'. Included are 'ScraperException', 'DatabaseException', 'TemplateException', and 'ParserError', each tailored to a specific component and type of error within the application. ''' from common.error_codes import ErrorCodes Loading
src/common/scrapers/scripter.pydeleted 100755 → 0 +0 −25 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Provide a script handler method which can process sequentially, parallel.""" import os from pathlib import Path from common.logz import create_logger def get_all_scripts(script_dir, logger=create_logger()): """Retrieve all scripts in the specified directory.""" r_scripts = [] py_scripts = [] sh_scripts = [] script_path = Path(script_dir) if not script_path.is_dir(): logger.error(f"{script_dir} is not a directory or does not exist.") pass logger.info(f"looking for scripts in {script_dir}") logger.debug(f"Directory contents: {os.listdir(script_dir)}") r_scripts = list(script_path.glob("*.R")) py_scripts = list(script_path.glob("*.py")) sh_scripts = list(script_path.glob("*.sh")) pl_scripts = list(script_path.glob("*.pl")) return [str(x.resolve) for x in r_scripts + py_scripts + sh_scripts + pl_scripts]
src/common/scrapers/static.py +4 −3 Original line number Diff line number Diff line #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Provide static objects.""" from dataclasses import dataclass import re @dataclass class Colors: """Provide a dataclass of colors for pretty printing.""" Loading @@ -17,7 +18,7 @@ class Colors: WHITE = "\33[37m" RESET = "\33[39m" @dataclass class UrlRegex: """Define a URL Regex.""" Loading @@ -31,7 +32,7 @@ class UrlRegex: + r"([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*" ) @dataclass class ImageConfig: """Static dictionaries and values for OCR of images.""" Loading
src/common/scrapers/webdriver.py +1 −87 Original line number Diff line number Diff line Loading @@ -336,89 +336,3 @@ class WebDriver: raise ParserError(msg) except Exception as e: self.logger.error(f"Unknown exception while waiting for element: " f"{e}") No newline at end of file if __name__ == "__main__": import time from bs4 import BeautifulSoup as Soup from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import Select test_url = "http://www.example.com" print("Testing if requests work") with WebDriver(url=test_url, driver="curl") as d: source = d.dump_out() print(source) with WebDriver( url=test_url, driver="chromedriver", options=[ "--no-sandbox", "--disable-gpu", "--disable-logging", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--no-zygote", "headless", ], service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any"], ) as d: source = d.driver.page_source print(source) print("Running example EI ID 182") url_182 = "https://ebill.kcelectric.coop/woViewer/mapviewer.html?" url_182 = url_182 + "config=Outage+Web+Map" chrome_opts = [ "--no-sandbox", "--disable-gpu", "--disable-logging", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--no-zygote", "headless", ] service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"] with WebDriver( url=url_182, driver="chromedriver", options=chrome_opts, service_args=service_args, ) as d: xpath = '//div[@id="OMS.Customers Summary"]' # d.wait_for_element(xpath, 'xpath') # arget = d.driver.find_element_by_xpath(xpath) target = d.get_xpath(xpath) ActionChains(d.driver).move_to_element(target).click(target).perform() d.wait_for_element("select", "tag") select = Select(d.driver.find_element_by_tag_name("select")) time.sleep(2) select.select_by_visible_text("County") source = d.driver.page_source soup = Soup(source, "html.parser") table = soup.findAll("table", {"class": "GNBU0IVDGE summary-table"}) rows = table[0].find_all("td") regions = [] custs_out = [] custs_served = [] for row in rows: if "summary-region-column" in str(row): regions.append( row.get_text() .replace(" County", "") .replace(" COUNTY", "") .strip() .replace("ST ", "ST. ") ) elif "summary-number-out-column" in str(row): custs_out.append(row.get_text()) elif ( "summary-number-served-column" in str(row) and "GMFGE5DLD" not in str(row) and "%" not in row.get_text() ): custs_served.append(row.get_text()) else: pass print("Regions found: %s" % regions) print("Customers out: %s" % custs_out) print("Custs served: %s" % custs_served)