Commit b8707ee2 authored by zihanl's avatar zihanl
Browse files

change directory name to msdp

parent 4e48efdf
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line

# Multi-Stage Prompting for Knowledgeable Dialogue Generation

This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/knwl_dialo).
This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).
+8 −8
Original line number Diff line number Diff line
@@ -13,13 +13,13 @@ WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>

# We provide examples for processing the raw data from Wizard of Wikipedia
# Processing the train dataset (train.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/train.json \
        --processed_file ${WOW_DATA_FOLDER}/train_processed.txt

# Processing test seen dataset (test_random_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
@@ -27,7 +27,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt

# processing test unseen dataset (test_topic_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
@@ -37,7 +37,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \

# We provide the following script to process the raw data from Wizard of Internet
# Processing the test dataset (test.jsonl)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_woi_dataset \
        --raw_file ${WOI_DATA_FOLDER}/test.jsonl \
        --processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
@@ -48,7 +48,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# Get the knowledge generation prompts for the each test dataset in WoW and WoI
MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL> 
# WoW test seen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -57,7 +57,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --data_type wow_seen

# WoW test unseen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -66,7 +66,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --data_type wow_unseen

# WoI
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOI_DATA_FOLDER}/test_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -76,7 +76,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \


# Get the response generation prompts (can be applied for all the test datasets)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_resp_gen_prompts \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt
+2 −2
Original line number Diff line number Diff line
@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task KNWL-DIALO-EVAL-F1 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}

+4 −4
Original line number Diff line number Diff line
@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \ 
        (e.g., /testseen_response_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task KNWL-DIALO-EVAL-F1 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}

@@ -37,14 +37,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task KNWL-DIALO-EVAL-F1 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}

+1 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
        (e.g., /testseen_processed_with_generated_knowledge.txt)

python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
        --func prepare_input \
        --test_file ${TEST_FILE} \
        --knowledge_gen_file ${KNOWLEDGE_FILE} \
Loading