提交 c8e38fb4 authored 作者: 宋宏伟's avatar 宋宏伟

update code

上级 cbc75fe3
...@@ -74,7 +74,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc, ...@@ -74,7 +74,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc,
null as rx_end_datetime, null as rx_end_datetime,
a.dose as dosage_qty, a.dose as dosage_qty,
a.dose_unit_name as dosage_unit, a.dose_unit_name as dosage_unit,
a.frequency_code, a.frequency_code as frequency,
a.frequency_name, a.frequency_name,
a.qty as quantity, a.qty as quantity,
null as quantity_uom, null as quantity_uom,
...@@ -156,7 +156,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc, ...@@ -156,7 +156,7 @@ def etl_prescribing(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc,
a.end_time as rx_end_datetime, a.end_time as rx_end_datetime,
a.dose as dosage_qty, a.dose as dosage_qty,
a.dose_unit_name as dosage_unit, a.dose_unit_name as dosage_unit,
a.frequency_code, a.frequency_code as frequency,
a.frequency_name, a.frequency_name,
a.qty as quantity, a.qty as quantity,
null as quantity_uom, null as quantity_uom,
......
...@@ -50,7 +50,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc ...@@ -50,7 +50,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc
select DISTINCT * from iceberg.cdm.lab_report_result select DISTINCT * from iceberg.cdm.lab_report_result
where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180') where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180')
or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血') or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血')
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120' or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120')
or (test_item_name ~* 'HbA1c|糖化血红蛋白') or (test_item_name ~* 'HbA1c|糖化血红蛋白')
) )
select DISTINCT select DISTINCT
...@@ -125,7 +125,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc ...@@ -125,7 +125,7 @@ def etl_lab_result_cm(d_start_time, d_end_time, d_start_time_utc, d_end_time_utc
select DISTINCT * from iceberg.cdm.lab_report_result select DISTINCT * from iceberg.cdm.lab_report_result
where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180') where (test_item_name ~* 'C肽|C-PR' and test_item_name ~* '空腹|1|60|2|120|3|180')
or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血') or (test_item_name ~* '空腹|FPG|空腹血糖' and test_item_name ~* '血')
or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120' or (test_item_name ~* 'OGTT|耐量|负荷' and test_item_name ~* '2|120')
or (test_item_name ~* 'HbA1c|糖化血红蛋白') or (test_item_name ~* 'HbA1c|糖化血红蛋白')
) )
select DISTINCT select DISTINCT
......
...@@ -4,7 +4,7 @@ options(scipen = 1, digits = 2) ...@@ -4,7 +4,7 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8') options(encoding = 'UTF-8')
library(here) library(here)
source(here("codes","preprocess","R","target_group.R")) source(here("target_group.R"))
# 基线 ------------------------------------------------------------------------- # 基线 -------------------------------------------------------------------------
## 人口学 #### ## 人口学 ####
......
...@@ -4,8 +4,8 @@ options(scipen = 1, digits = 2) ...@@ -4,8 +4,8 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8') options(encoding = 'UTF-8')
library(here) library(here)
source(here("codes","preprocess","R","common.R")) source(here("common.R"))
source(here("codes","preprocess","R","dataset_for_analysis.R")) source(here("dataset_for_analysis.R"))
# load(here("data", "clean", "dataset_for_analysis.RData")) # load(here("data", "clean", "dataset_for_analysis.RData"))
......
...@@ -4,9 +4,9 @@ options(scipen = 1, digits = 2) ...@@ -4,9 +4,9 @@ options(scipen = 1, digits = 2)
options(encoding = 'UTF-8') options(encoding = 'UTF-8')
library(here) library(here)
source(here("codes","preprocess","R","common.R")) source(here("common.R"))
source(here("codes","preprocess","R","wrangling.R")) source(here("wrangling.R"))
patient_clean <- arrow::read_parquet("../../../data/preprocessed/patient_clean.parquet") patient_clean <- arrow::read_parquet("../../../data/preprocessed/patient_clean.parquet")
visit_clean <- arrow::read_parquet("../../../data/preprocessed/visit_clean.parquet") visit_clean <- arrow::read_parquet("../../../data/preprocessed/visit_clean.parquet")
......
...@@ -20,7 +20,7 @@ library(zoo) ...@@ -20,7 +20,7 @@ library(zoo)
# library(eoffice) # library(eoffice)
library(tableone) library(tableone)
library(plotly) library(plotly)
source(here("codes","preprocess","R","common.R")) source(here("common.R"))
# 导入数据 # 导入数据
...@@ -108,7 +108,7 @@ rx_sep <- rx_raw %>% ...@@ -108,7 +108,7 @@ rx_sep <- rx_raw %>%
mutate(visit_id = as.character(visit_id)) %>% mutate(visit_id = as.character(visit_id)) %>%
drop_na(std_rx_desc) %>% drop_na(std_rx_desc) %>%
distinct() %>% distinct() %>%
left_join(select(visit_clean, visit_id, patient_id, patient_type, admission_datetime, specialty = raw_specialty, provider_id)) %>% left_join(select(visit_clean, visit_id, patient_id, patient_type, admission_datetime, specialty, provider_id)) %>%
# mutate(order_datetime = case_when(is.na(order_datetime) ~ admission_datetime, # mutate(order_datetime = case_when(is.na(order_datetime) ~ admission_datetime,
# TRUE ~ order_datetime)) %>% # TRUE ~ order_datetime)) %>%
mutate(order_datetime = case_when(!is.na(rx_start_datetime) ~ rx_start_datetime, mutate(order_datetime = case_when(!is.na(rx_start_datetime) ~ rx_start_datetime,
......
#################################### 数据提取配置 ############################################## #################################### 数据提取配置 ##############################################
# 机构id 传入None 则提全部机构数据 需要传入列表 # 机构id 传入None 则提全部机构数据 需要传入列表
[pv_ids] [pv_ids]
pv_ids = ['320106426090445', '320104466002630', '320106466000838'] # pv_ids = ['320106426090445', '320104466002630', '320106466000838']
# pv_ids = [None] # pv_ids = ['320106426090445']
pv_ids = [None]
# 数据提取范围 需要传入开始时间和结束时间 需要传入列表 # 数据提取范围 需要传入开始时间和结束时间 需要传入列表
[date_ranges] [date_ranges]
date_ranges = [["2021-01-01", "2021-07-01"]] # date_ranges = [["2021-01-01", "2021-07-01"]]
# date_ranges = [["2021-01-01", "2021-07-01"],["2021-07-01", "2022-01-01"],["2022-01-01", "2022-07-01"],["2022-07-01", "2023-01-01"],["2023-01-01", "2023-07-01"],["2023-07-01", "2024-01-01"],["2024-01-01", "2024-07-01"],["2024-07-01", "2024-10-01"]] date_ranges = [["2021-01-01", "2021-07-01"],["2021-07-01", "2022-01-01"],["2022-01-01", "2022-07-01"],["2022-07-01", "2023-01-01"],["2023-01-01", "2023-07-01"],["2023-07-01", "2024-01-01"],["2024-01-01", "2024-07-01"],["2024-07-01", "2024-11-01"]]
# 表格提取 # 表格提取
[tables] [tables]
tables = ['patient', 'visit', 'prescribing', 'diagnosis', 'lab'] tables = ['patient', 'visit', 'prescribing', 'diagnosis', 'lab']
# tables = ['lab']
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论