Commit a417a44b authored by Powell, Eric's avatar Powell, Eric
Browse files

New files for CPMAI class

parent 14d18adf
Loading
Loading
Loading
Loading
+29 −0
Original line number Diff line number Diff line
vDimWBSMaster ||--o{ vDimCostObject : "WBSELEMENT = CostObject" 

    vDimCostObject ||--o{ vFactLineItemDetail : "CostObjectSK" 

    vDimCostObject ||--o{ FactPOCommitments : "CostObjectSK" 

    vDimCostObject ||--o{ vFactBudgetReporting : "CostObjectSK" 

    vDimMonthPeriods ||--o{ vFactLineItemDetail : "PeriodSK" 

    vDimMonthPeriods ||--o{ FactPOCommitments : "PeriodSK" 

    vDimMonthPeriods ||--o{ vFactBudgetReporting : "PeriodSK" 

    vDimPurchaseOrder ||--o{ vFactLineItemDetail : "PurchaseOrderItemSK" 

    vDimPurchaseOrder ||--o{ FactPOCommitments : "PurchaseOrderItemSK" 

    CostElementMasterAndEOC_ALLGroups ||--o{ vFactLineItemDetail : "CostElementSK" 

    CostElementMasterAndEOC_ALLGroups ||--o{ FactPOCommitments : "CostElementSK" 

    vFactLineItemDetail ||--o{ vFactLineItemDetailOverhead : "DocumentNumberItemSK" 

    vFactLineItemDetail ||--o{ vTabPersonnel : "LineItemPersonnelSK" 

    FactPOCommitments ||--o{ FactPOCommitmentsOverhead : "PurchaseOrderItemSK, PeriodSK, OriginalCostElementSK, CostObjectSK" 

 
 No newline at end of file
+594 −0
Original line number Diff line number Diff line
/* Initial AI analysis
 * 
 * 
 * 
 */

-- public.field_definitions source
drop view public.field_definitions;
CREATE OR REPLACE VIEW public.v_consolidated_fieldnames
AS SELECT consolidated_unique_fields.field_name,
    consolidated_unique_fields.description,
    array_to_json(string_to_array(sourcetable , ',')) AS source_tables
   FROM consolidated_unique_fields
  WHERE length(consolidated_unique_fields.description)>1
  ORDER BY consolidated_unique_fields.sourcetable, consolidated_unique_fields.field_name;


select field_name, source_tables from v_consolidated_fieldnames 
where json_array_length(source_tables)> 1;


drop view v_table_with_connections;
create or replace view v_table_with_connections as
with data as 
(
	select field_name, json_array_elements_text(source_tables) as tablename from v_consolidated_fieldnames
	where json_array_length(source_tables)> 1
	order by json_array_elements_text(source_tables), field_name
)
select distinct tablename from data;

select source_field, target_field from updated_expanded_datacube_crosswalk uedc 
where uedc.source_field_description =uedc.target_field_description ;

--Query the fieldname joins from 
create or replace view v_fildname_joins as
select source_field from updated_expanded_datacube_crosswalk uedc ;

-- Query that looks though the loaded data and find common values in the 'example' field
--between rows to infer commonality between tables.

create or replace view v_value_joins as
with data as 
(
	select count(*) as the_count, fieldname, cd.example  
	from consolidated_dataset cd
	where example is not null
	group by fieldname, example
)
select fieldname from data where the_count > 1;

-- Query the fields joined on the descriptions
create or replace view v_description_joins as
with data as 
(
	select source_field as fieldname from crosswalk_by_description cbd
	union select target_field as fieldname from crosswalk_by_description cbd
 )
select distinct fieldname from data;

--Get the tables examined
create or replace view v_tables as
select distinct tablename from consolidated_dataset cd ;


select * from v_value_joins vvj 
full outer join v_fildname_joins vfj 
on vvj.fieldname =vfj.source_field 
full outer join v_description_joins vdj 
on vvj.fieldname =vdj.fieldname ;



/* Common field analysis with expanded dataaet (by schema)
 * 
*/


/* Field names */

--find common fields by fieldname - procstats
create or replace view procstats.v_common_fields as 
with stats as 
(
	select count(*), fields from procstats.fields f 
	group by fields
)
select s.fields, tablename from  procstats.fields f
inner join stats s 
on s.fields =f.fields
where count > 1
order by fields;

--find common fields by fieldname - reqstats
create or replace view reqstats.v_common_fields as
with stats as 
(
	select count(*), fields from reqstats.fields f 
	group by fields
)
select s.fields, tablename from  reqstats.fields f
inner join stats s 
on s.fields =f.fields
where count > 1
order by fields;

--find common fields by fieldname - finance_reporting
drop view finance_reporting.v_common_fields;
create or replace view finance_reporting.v_common_fields as
with stats as 
(
	select count(*), field from finance_reporting.fields f 
	group by field
)
select s.field  as fields, tablename from  finance_reporting.fields f
inner join stats s 
on s.field =f.field
where count > 1
order by f.field;

--find common fields by fieldname - financelite
drop view financelite.v_common_fields;
create or replace view financelite.v_common_fields as
with stats as 
(
	select count(*), fields from financelite.fields f 
	group by fields
)
select s.fields, tablename from  financelite.fields f
inner join stats s 
on s.fields =f.fields
where count > 1
order by f.fields;


/* Field Description  */

--find common fields by field description - procstats
drop view procstats.v_common_field_description;
create or replace view procstats.v_common_field_description as 
with stats as 
(
	select count(*), field_description, fields from procstats.fields f
	where field_description is not null
	and length(field_description) >= 2
	group by field_description, fields
)
select f.fields, tablename from  procstats.fields f
inner join stats s 
on s.field_description =f.field_description
where count > 1
and f.fields != s.fields
order by fields;

--find common fields by field description - reqstats
drop view reqstats.v_common_field_description;
create or replace view reqstats.v_common_field_description as
with stats as 
(
	select count(*), field_description, fields from reqstats.fields f
	where field_description is not null
	and length(field_description) >= 2
	group by field_description, fields
)
select f.fields, tablename from  reqstats.fields f
inner join stats s 
on s.field_description =f.field_description
where count > 1
and f.fields != s.fields
order by fields;

--find common fields by field description - finance_reporting
drop view finance_reporting.v_common_field_description;
create or replace view finance_reporting.v_common_field_description as
with stats as 
(
	select count(*), field_description, field
	from finance_reporting.fields f
	where field_description is not null
	and length(field_description) >= 2
	group by field_description, f.field
)
select distinct f.field fields, tablename, s.field_description 
from  finance_reporting.fields f
inner join stats s 
on s.field_description = f.field_description
where count >= 2
and f.field != s.field
order by f.field;


--find common fields by field description - reqstats
drop view financelite.v_common_field_description;
create or replace view financelite.v_common_field_description as
with stats as 
(
	select count(*), field_description, fields from financelite.fields f
	where field_description is not null
	and length(field_description) >= 2
	group by field_description, fields
)
select f.fields, tablename from  financelite.fields f
inner join stats s 
on s.field_description =f.field_description
where count > 1
and f.fields != s.fields
order by fields;


/* Example ****/

--find common fields by example - finance_reporting
drop view finance_reporting.v_common_field_example;
create or replace view finance_reporting.v_common_field_example as
with stats as 
(
	select count(*) rec_count, example 
	from finance_reporting.fields f
	where length(example) > 1
	and example != 'no values'
	and lower(example) != 'unknown'
	group by example
)
select f.field fields, tablename,s.example 
from  finance_reporting.fields f
inner join stats s 
on s.example =f.example
where s.rec_count >=2
order by field asc;

--find common fields by example - procstat
drop view procstats.v_common_field_example;
create or replace view procstats.v_common_field_example as
with stats as 
(
	select count(*) rec_count, example 
	from procstats.fields f
	where length(example) > 1
	and example != 'no values'
	and lower(example) != 'unknown'
	group by example
)
select f.fields, tablename,s.example from procstats.fields f
inner join stats s 
on s.example =f.example
where s.rec_count >=2
order by fields asc;

--find common fields by example - reqstats
drop view reqstats.v_common_field_example;
create or replace view reqstats.v_common_field_example as
with stats as 
(
	select count(*) rec_count, example 
	from reqstats.fields f
	where length(example) > 1
	and example != 'no values'
	and lower(example) != 'unknown'
	group by example
)
select f.fields, tablename,s.example 
from reqstats.fields f
inner join stats s 
on s.example =f.example
where s.rec_count >=2
order by fields asc;

--find common fields by example - financelite
drop view financelite.v_common_field_example;
create or replace view financelite.v_common_field_example as
with stats as 
(
	select count(*) rec_count, example 
	from financelite.fields f
	where length(example) > 1
	and example != 'no values'
	and lower(example) != 'unknown'
	group by example
)
select f.fields, tablename,s.example 
from financelite.fields f
inner join stats s 
on s.example =f.example
where s.rec_count >=2
order by fields asc;

/* Generate a view per schema cmparing the 3 field lists
 * 
 */

-- Finance reporting - field list comparison
drop view finance_reporting.v_table_field_comparison;
create or replace view finance_reporting.v_table_field_comparison as
select distinct vcf.fields fname_fields, 
vcf.tablename fname_table, 
vcfd.fields desc_fields, 
vcfd.tablename desc_table, 
vcfe.fields example_fields, 
vcfe.tablename example_table,
case 
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is null then 33
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is null then 66
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is not null then 66
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is not null then 100
end as confidence
from finance_reporting.v_common_fields vcf 
full outer join finance_reporting.v_common_field_description vcfd 
on vcfd.fields = vcf.fields
full outer join finance_reporting.v_common_field_example vcfe 
on vcfe.fields = vcf.fields;


-- Finance reporting - field list comparison
drop view financelite.v_table_field_comparison;
create or replace view financelite.v_table_field_comparison as
select distinct vcf.fields fname_fields, 
vcf.tablename fname_table, 
vcfd.fields desc_fields, 
vcfd.tablename desc_table, 
vcfe.fields example_fields, 
vcfe.tablename example_table,
case 
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is null then 33
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is null then 66
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is not null then 66
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is not null then 100
end as confidence
from financelite.v_common_fields vcf 
full outer join financelite.v_common_field_description vcfd 
on vcfd.fields = vcf.fields
full outer join financelite.v_common_field_example vcfe 
on vcfe.fields = vcf.fields;



-- View to distill all values from the above table into a list of the unique filed / table combinations

create or replace view finance_reporting.v_unique_field_table_list as
with data as 
(
	select fname_fields fieldname, 
	fname_table tablename 
	from finance_reporting.v_table_field_comparison
	union
	select desc_fields fieldname, 
	desc_table tablename 
	from finance_reporting.v_table_field_comparison
	union
	select example_fields fieldname, 
	example_table tablename 
	from finance_reporting.v_table_field_comparison
)
select distinct fieldname, tablename from data
order by "data".fieldname , "data".tablename ;

-- View to distill all values from the above table into a list of the unique filed / table combinations

create or replace view financelite.v_unique_field_table_list as
with data as 
(
	select fname_fields fieldname, 
	fname_table tablename 
	from financelite.v_table_field_comparison
	union
	select desc_fields fieldname, 
	desc_table tablename 
	from financelite.v_table_field_comparison
	union
	select example_fields fieldname, 
	example_table tablename 
	from financelite.v_table_field_comparison
)
select distinct fieldname, tablename from data
order by "data".fieldname , "data".tablename ;


-- Using this, filter out the single rows, this is trying to build a set of vectors connecting tables
create or replace view finance_reporting.v_table_common_fields as
with data as 
(
	select count(*) as the_count, fieldname 
	from finance_reporting.v_unique_field_table_list
	group by fieldname
)
select distinct u.fieldname, tablename, vtfc.confidence 
from data d 
inner join finance_reporting.v_unique_field_table_list u
on d.fieldname = u.fieldname
inner join finance_reporting.v_table_field_comparison vtfc 
on vtfc.fname_fields = u.fieldname and vtfc.fname_table = u.tablename
where the_count > 1;


-- ProcStats - field list comparison
drop view procstats.v_table_field_comparison;
create or replace view procstats.v_table_field_comparison as
select distinct vcf.fields fname_fields, 
vcf.tablename fname_table, 
vcfd.fields desc_fields, 
vcfd.tablename desc_table, 
vcfe.fields example_fields, 
vcfe.tablename example_table,
case 
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is null then 33
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is null then 66
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is not null then 66
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is not null then 100
end as confidence
from procstats.v_common_fields vcf 
full outer join procstats.v_common_field_description vcfd 
on vcfd.fields = vcf.fields
full outer join procstats.v_common_field_example vcfe 
on vcfe.fields = vcf.fields;

-- View to distill all values from the above table into a list of the unique filed / table combinations

create or replace view procstats.v_unique_field_table_list as
with data as 
(
	select fname_fields fieldname, 
	fname_table tablename 
	from procstats.v_table_field_comparison
	union
	select desc_fields fieldname, 
	desc_table tablename 
	from procstats.v_table_field_comparison
	union
	select example_fields fieldname, 
	example_table tablename 
	from procstats.v_table_field_comparison
)
select distinct fieldname, tablename from data
order by "data".fieldname , "data".tablename ;


-- Using this, filter out the single rows, this is trying to build a set of vectors connecting tables
create or replace view procstats.v_table_common_fields as
with data as 
(
	select count(*) as the_count, fieldname 
	from procstats.v_unique_field_table_list
	group by fieldname
)
select distinct u.fieldname, tablename, vtfc.confidence 
from data d 
inner join procstats.v_unique_field_table_list u
on d.fieldname = u.fieldname
inner join procstats.v_table_field_comparison vtfc 
on vtfc.fname_fields = u.fieldname and vtfc.fname_table = u.tablename
where the_count > 1;

-- RecStats - field list comparison
drop view reqstats.v_table_field_comparison;
create or replace view reqstats.v_table_field_comparison as
select distinct vcf.fields fname_fields, 
vcf.tablename fname_table, 
vcfd.fields desc_fields, 
vcfd.tablename desc_table, 
vcfe.fields example_fields, 
vcfe.tablename example_table,
case 
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is null then 33
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is null then 66
	when vcf.fields is not null and vcfd.fields is null and vcfe.fields is not null then 66
	when vcf.fields is not null and vcfd.fields is not null and vcfe.fields is not null then 100
end as confidence
from reqstats.v_common_fields vcf 
full outer join reqstats.v_common_field_description vcfd 
on vcfd.fields = vcf.fields
full outer join reqstats.v_common_field_example vcfe 
on vcfe.fields = vcf.fields;

-- View to distill all values from the above table into a list of the unique filed / table combinations

create or replace view reqstats.v_unique_field_table_list as
with data as 
(
	select fname_fields fieldname, 
	fname_table tablename 
	from reqstats.v_table_field_comparison
	union
	select desc_fields fieldname, 
	desc_table tablename 
	from reqstats.v_table_field_comparison
	union
	select example_fields fieldname, 
	example_table tablename 
	from reqstats.v_table_field_comparison
)
select distinct fieldname, tablename from data
order by "data".fieldname , "data".tablename ;

-- Using this, filter out the single rows, this is trying to build a set of vectors connecting tables
create or replace view reqstats.v_table_common_fields as
with data as 
(
	select count(*) as the_count, fieldname 
	from reqstats.v_unique_field_table_list
	group by fieldname
)
select distinct u.fieldname, tablename, vtfc.confidence 
from data d 
inner join reqstats.v_unique_field_table_list u
on d.fieldname = u.fieldname
inner join reqstats.v_table_field_comparison vtfc 
on vtfc.fname_fields = u.fieldname and vtfc.fname_table = u.tablename
where the_count > 1;


-- Using this, filter out the single rows, this is trying to build a set of vectors connecting tables
create or replace view financelite.v_table_common_fields as
with data as 
(
	select count(*) as the_count, fieldname 
	from financelite.v_unique_field_table_list
	group by fieldname
)
select distinct u.fieldname, tablename, vtfc.confidence 
from data d 
inner join financelite.v_unique_field_table_list u
on d.fieldname = u.fieldname
inner join financelite.v_table_field_comparison vtfc 
on vtfc.fname_fields = u.fieldname and vtfc.fname_table = u.tablename
where the_count > 1;

/* Cross-Schema comparisons   */


/* Tablename analysis */

drop view public.v_common_tables;
create or replace view public.v_common_tables as 
with stats as(
	with tabledata as 
	(
		select tablename, 'finance' as subjectarea 
		from finance_reporting.bi_finance_dw_reporting_tables bfdrt
		union
		select tablename, 'procstats' as subjectarea 
		from procstats.bi_procstats_tables bpt 
		union
		select tablename, 'reqstats' as subjectarea 
		from reqstats.bi_req_stats_dw_tables brsdt 
		union
		select tablename, 'travelmod' as subjectarea 
		from travel_model.travel_reporting_model_tables
		union
		select tablename, 'financelite' as subjectarea 
		from financelite.financelite_tables 
	)
	select count(*), tablename from tabledata
	group by tablename
)
select tablename, count as occurences from stats where count > 1
order by count desc;

/** Cross Dimension Joins */
create or replace view public.v_commonfields as
select 'BI Finance Reporting DW' as dimension, f.field fields, f.tablename, example, field_description
from finance_reporting.fields f
inner join finance_reporting.v_unique_field_table_list vuftl 
on f.field = vuftl.fieldname and f.tablename =vuftl.tablename
union 
select 'BI ProcStats DW' as dimension, f.fields, f.tablename, example, field_description
from procstats.fields f
inner join procstats.v_unique_field_table_list vuftl 
on f.fields = vuftl.fieldname and f.tablename =vuftl.tablename
union 
select 'BI ReqStats DW' as dimension, f.fields, f.tablename, example, field_description
from reqstats.fields f
inner join reqstats.v_unique_field_table_list vuftl 
on f.fields = vuftl.fieldname and f.tablename =vuftl.tablename
union 
select 'Financelite' as dimension, f.fields, f.tablename, example, field_description
from financelite.fields f
inner join financelite.v_unique_field_table_list vuftl 
on f.fields = vuftl.fieldname and f.tablename =vuftl.tablename
;


--Find the fields that are common across dimensions
create or replace view public.vw_fieldnames_across_faces as
with counts as 
(
	with data as 
	(
		select distinct dimension, fields
		from public.v_commonfields vc
	)
	select count(*) d_count, fields from data 
	group by fields
)
select * from counts where d_count > 1
order by d_count desc, fields asc;
+166 −0

File added.

Preview size limit exceeded, changes collapsed.

+3.35 MiB

File added.

No diff preview for this file type.

+10 KiB

File added.

No diff preview for this file type.

Loading