Commit e521aeee authored by Powell, Eric's avatar Powell, Eric
Browse files

Unmerged local changes

parent 952c5f74
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+168 −0
Original line number Diff line number Diff line
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# UV
#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#uv.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
+23 −2
Original line number Diff line number Diff line
from parse_res import ResEstimateData
# import psycopg2
# from psycopg2 import sql
from .pg_tools import PGTools

class LoadRes(ResEstimateData, PGTools):
@@ -144,3 +142,26 @@ class LoadRes(ResEstimateData, PGTools):
                                           'task_id')
        return

# class Baseline(ResEstimateData):
#     def __init__(self, project_id, workorder_title, schema_name, resource_pool_name, filename, **DB_PARAMS):
#         ResEstimateData.__init__(self, filename)
#         PGTools.__init__(self, **DB_PARAMS)
#         # self.pg_con = pg_conn
#         self.resource_poolid = self._get_id(schema_name,
#                                             "resource_pools",
#                                             "resource_pool_id",
#                                             "description", resource_pool_name)
#     def get_names(self):
#         dct_names = {}
#         for name in self.get_resource_names():
#             try:
#                 dct_names[name] = self._insert_record_and_get_pk('fo_itsd_estimate',
#                                                                  'resources',
#                                                                  {'full_name': name,
#                                                                   'resource_pool_id': self.resource_poolid},
#                                                                  'resource_id')
#             except Exception as e:
#                 if e.pgcode == '23505':
#                     dct_names[name] = self._get_id('fo_itsd_estimate', 'resources', 'resource_id', 'full_name', name)
#                     pass
+3 −1
Original line number Diff line number Diff line
import csv
import psycopg2
from psycopg2 import sql

from pandas import DataFrame
from pandasql import sqldf
from libraries.pg_tools import PGTools



class ExportPowerBInputs(PGTools):
    def __init__(self, Project_title, export_path, **DB_PARAMS):
        super().__init__(**DB_PARAMS)
+207 −0
Original line number Diff line number Diff line
import pandas as pd
from pandasql import sqldf
from parse_res import ResEstimateData
# Helper to make sqldf easier to use by finding DataFrames in the local scope
pysqldf = lambda q: sqldf(q, locals())


class BaselineReportGenerator(ResEstimateData):

    def __init__(self, project_id, workorder_title, resource_pool_name, filename, workorder_id):
        """
        Initializes the report generator.

        Note: Added 'workorder_id' as it's required for building the activities table.
        """
        super().__init__(filename)  # Initialize the parent class
        self.project_id = project_id
        self.resource_pool_name = resource_pool_name
        self.workorder_title = workorder_title
        self.workorder_id = workorder_id

        # Internal maps to store generated IDs
        self._activity_id_map = {}  # Maps 'est_act_id' to a new numeric PK
        self._resource_id_map = {}  # Maps 'resource_name' to a new numeric PK

    def _build_activities_df(self):
        """
        Builds the 'activities' DataFrame using data from the parent class.
        This replaces the database-insert logic from your original _get_activities.
        """
        activities_list = []
        current_activity_pk = 1

        # Use the .get_wbs_data() method inherited from ResEstimateData
        for activity in self.get_wbs_data()['Activities']:
            est_act_id = activity[0]
            activity_desc = activity[1]

            if est_act_id not in self._activity_id_map:
                self._activity_id_map[est_act_id] = current_activity_pk
                activities_list.append({
                    'activity_id': current_activity_pk,
                    'activity_description': activity_desc,
                    'est_act_id': est_act_id,
                    'workorder_id': self.workorder_id  # Use workorder_id from self
                })
                current_activity_pk += 1

        return pd.DataFrame(activities_list)

    def _build_resource_map(self):
        """
        Builds an in-memory map for resource names to numeric IDs.
        """
        current_resource_pk = 1
        # Use the .get_resource_names() method inherited from ResEstimateData
        for name in self.get_resource_names():
            if name not in self._resource_id_map:
                self._resource_id_map[name] = current_resource_pk
                current_resource_pk += 1

    def _build_task_details_df(self):
        """
        Builds the 'task_details' DataFrame using data from the parent class.
        This replaces the logic from your original _get_tasks.
        """
        # Ensure the resource map is built first
        self._build_resource_map()

        tasks_list = []

        # Get all data from inherited methods
        person_hours_tasks = self.get_person_hours_task()
        wbs_tasks = self.get_wbs_data()['Tasks']
        task_dates = self.get_task_dates()

        for task in person_hours_tasks:
            resource_name, task_data = task[0], task[1]
            task_item = task_data['Item']

            wbs = [w for w in wbs_tasks if w[0] == task_item]
            if not wbs: continue

            task_date_info = [td for td in task_dates if td[0] == task_item]
            if not task_date_info: continue

            taskrec = {
                'resource_id': self._resource_id_map.get(resource_name),
                'hours': task_data['Hours'],
                'contingency_percent': task_data['Cont %'],
                'start_date': task_date_info[0][1],
                'end_date': task_date_info[0][2],
                'activity_id': self._activity_id_map.get(wbs[0][1]),  # Use map
                'task_description': wbs[0][3],
                'est_task_id': wbs[0][2]
            }

            tasks_list.append(taskrec)

        return pd.DataFrame(tasks_list)

    def generate_task_schedule_report(self, workorders_df, projects_df, resource_pools_df):
        """
        Generates the final task schedule report by running the SQL query.

        This method requires the lookup DataFrames as arguments.
        """

        # --- 1. Build the in-memory DataFrames ---
        # These local variable names MUST match the table names in the SQL query
        activities = self._build_activities_df()
        task_details = self._build_task_details_df()

        # --- 2. Assign external DFs to local variables ---
        # These also must match the table names in the SQL query
        workorders = workorders_df
        projects = projects_df
        resource_pools = resource_pools_df

        # --- 3. Define and run Query ---
        # We define pysqldf here to use locals(), which captures the
        # DataFrames we just defined (activities, task_details, etc.)
        pysqldf = lambda q: sqldf(q, locals())

        query = """
            WITH actvity_dates AS (
                 SELECT 
                    start_date,
                    (julianday(end_date) - julianday(start_date)) AS duration,
                    est_task_id AS task_id,
                    activity_id
                   FROM task_details
                  GROUP BY est_task_id, start_date, end_date, activity_id
            )
             SELECT 
                a.activity_description,
                td.task_description,
                ad.start_date,
                ad.duration,
                wo.title AS initiative_name,
                p.project_name,
                rp.description AS team
               FROM actvity_dates ad
                 JOIN activities a ON a.activity_id = ad.activity_id
                 JOIN task_details td ON ad.activity_id = td.activity_id
                 JOIN workorders wo ON wo.workorder_id = a.workorder_id
                 JOIN projects p ON wo.project_id = p.project_id
                 JOIN resource_pools rp ON rp.resource_pool_id = wo.resource_pool_id;
        """

        try:
            result_df = pysqldf(query)
            return result_df
        except Exception as e:
            print(f"Error running pandasql query: {e}")
            return pd.DataFrame()

    def generate_activity_schedule_report(self, workorders_df, projects_df, resource_pools_df):
        """
        Generates the final task schedule report by running the SQL query.

        This method requires the lookup DataFrames as arguments.
        """

        # --- 1. Build the in-memory DataFrames ---
        # These local variable names MUST match the table names in the SQL query
        activities = self._build_activities_df()
        task_details = self._build_task_details_df()

        # --- 2. Assign external DFs to local variables ---
        # These also must match the table names in the SQL query
        workorders = workorders_df
        projects = projects_df
        resource_pools = resource_pools_df

        # --- 3. Define and run Query ---
        # We define pysqldf here to use locals(), which captures the
        # DataFrames we just defined (activities, task_details, etc.)
        pysqldf = lambda q: sqldf(q, locals())

        query = """
        WITH actvity_dates AS (
            SELECT min(task_details.start_date) AS start_date,
            max(task_details.end_date) - min(task_details.start_date) AS duration,
            task_details.activity_id
            FROM fo_itsd_estimate.task_details
            GROUP BY task_details.activity_id
        )
        SELECT a.activity_description,
        ad.start_date,
        ad.duration,
        wo.title AS initiative_name,
        p.project_name,
        rp.description AS team
        FROM actvity_dates ad
            JOIN activities a ON a.activity_id = ad.activity_id
            JOIN workorders wo ON wo.workorder_id = a.workorder_id
            JOIN projects p ON wo.project_id = p.project_id
            JOIN resource_pools rp ON rp.resource_pool_id = wo.resource_pool_id;
        """

        try:
            result_df = pysqldf(query)
            return result_df
        except Exception as e:
            print(f"Error running pandasql query: {e}")
            return pd.DataFrame()
 No newline at end of file
+55 −0
Original line number Diff line number Diff line

from parse_res import ResEstimateData
from pandas import DataFrame as df
from pandasql import sqldf

class Baseline(ResEstimateData):
    def __init__(self, project_id, workorder_title, resource_pool_name, filename):
        ResEstimateData.__init__(self, filename)
        self.project_id = project_id
        self.resource_pool_name = resource_pool_name
        self.workorder_title = workorder_title

    def _get_names(self):
        lst_names = []
        for name in self.get_resource_names():
            dct_name = {}
            dct_name['name'] = {'full_name': name, 'resource_pool_name': self.resource_pool_name}
            lst_names.append(dct_name)
        df_dups = df(lst_names)
        df_name = df_dups.drop_duplicates()
        return df_name

    def _get_activities(self):

        activities_list = []
        # Load Activities
        dct_est_act_id = {}
        for activity in self.get_wbs_data()['Activities']:
            dct_est_act_id[activity[0]] = self._insert_record_and_get_pk('fo_itsd_estimate',
                                                                         'activities',
                                                                         {'activity_description': activity[1],
                                                                          'est_act_id': activity[0],
                                                                          'workorder_id': self.workorder_id},
                                                                         'activity_id')
        return dct_est_act_id

    def _get_tasks(self, dct_est_act_id, dct_names):
        tasks_list = []
        # Load Tasks
        for task in self.get_person_hours_task():
            taskrec = {}
            wbs = [wbs for wbs in self.get_wbs_data()['Tasks'] if wbs[0] == task[1]['Item']]
            task_dates = [{'start_date': taskdate[1], 'end_date': taskdate[2]} for taskdate in self.get_task_dates()
                          if taskdate[0] == task[1]['Item']]
            taskrec['resource_id'] = dct_names[task[0]]
            taskrec['hours'] = task[1]['Hours']
            taskrec['contingency_percent'] = task[1]['Cont %']
            taskrec['start_date'] = task_dates[0]['start_date']
            taskrec['end_date'] = task_dates[0]['end_date']
            taskrec['activity_id'] = dct_est_act_id[wbs[0][1]]
            taskrec['task_description'] = wbs[0][3]
            taskrec['est_task_id'] = wbs[0][2]
            tasks_list.append(taskrec)
        df_tasks = df(tasks_list)
        return df_tasks
 No newline at end of file
Loading