Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/pr-stage-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
matrix:
python-version: ['3.10']
include:
- torch: 2.0.0
- torch: 2.5.1
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -30,7 +30,7 @@ jobs:
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install PyTorch
run: pip install torch==${{matrix.torch}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
run: pip install torch==${{matrix.torch}} -f https://download.pytorch.org/whl/cpu/torch_stable.html
- name: Install system dependencies
run: |
sudo sed -i '$ a deb http://th.archive.ubuntu.com/ubuntu jammy main' /etc/apt/sources.list
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
- name: Upgrade pip
run: python -m pip install pip --upgrade
- name: Install PyTorch
run: pip install torch==2.0.0+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
run: pip install torch==2.5.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
- name: Install opencompass dependencies
run: |
pip install -r requirements.txt
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets.arc_prize_public_evaluation import ARCPrizeDataset, ARCPrizeEvaluator


# The system_prompt defines the initial instructions for the model,
# setting the context for solving ARC tasks.
system_prompt = '''You are a puzzle solving wizard. You are given a puzzle from the abstraction and reasoning corpus developed by Francois Chollet.'''

# User message template is a template for creating user prompts. It includes placeholders for training data and test input data,
# guiding the model to learn the rule and apply it to solve the given puzzle.
user_message_template = '''Here are the example input and output pairs from which you should learn the underlying rule to later predict the output for the given test input:
----------------------------------------
{training_data}
----------------------------------------
Now, solve the following puzzle based on its input grid by applying the rules you have learned from the training data.:
----------------------------------------
[{{'input': {input_test_data}, 'output': [[]]}}]
----------------------------------------
What is the output grid? Only provide the output grid in the form as in the example input and output pairs. Do not provide any additional information:'''


arc_prize_public_evaluation_reader_cfg = dict(
input_columns=['training_data', 'input_test_data'],
output_column='output_test_data'
)

arc_prize_public_evaluation_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='SYSTEM',fallback_role='HUMAN', prompt=system_prompt),
dict(role='HUMAN', prompt=user_message_template),
],
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer)
)

arc_prize_public_evaluation_eval_cfg = dict(
evaluator=dict(type=ARCPrizeEvaluator)
)

arc_prize_public_evaluation_datasets = [
dict(
abbr='ARC_Prize_Public_Evaluation',
type=ARCPrizeDataset,
path='opencompass/arc_prize_public_evaluation',
reader_cfg=arc_prize_public_evaluation_reader_cfg,
infer_cfg=arc_prize_public_evaluation_infer_cfg,
eval_cfg=arc_prize_public_evaluation_eval_cfg
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GaokaoBenchDataset
from mmengine.config import read_base

with read_base():
from .GaokaoBench_prompts import MCQ_prompts, FBQ_prompts

GaokaoBench_datasets = []
for folder, prompts in [
('Multiple-choice_Questions', MCQ_prompts),
('Fill-in-the-blank_Questions', FBQ_prompts),
]:
for p in prompts:
reader_cfg = {
'input_columns': ['question'],
'output_column': 'answer',
}
infer_cfg = {
'ice_template': {
'type': PromptTemplate,
'template': {'round': [{'role': 'HUMAN', 'prompt': p['prefix_prompt'] + '{question}'}]},
'ice_token': '</E>',
},
'retriever': {'type': ZeroRetriever},
'inferencer': {'type': GenInferencer},
}
eval_cfg = {
'evaluator': {'type': 'GaokaoBenchEvaluator' + '_' + p['type']},
'pred_role': 'BOT',
}
_base_path = 'opencompass/GAOKAO-BENCH'
dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + p['keyword'],
'path': _base_path,
'filename': '/' + folder + '/' + p['keyword'] + '.json',
'name': p['keyword'],
'reader_cfg': reader_cfg,
'infer_cfg': infer_cfg,
'eval_cfg': eval_cfg,
}
GaokaoBench_datasets.append(dataset)
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from mmengine.config import read_base
from copy import deepcopy
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator
from opencompass.datasets import MathBenchDataset, math_postprocess_v2
from opencompass.utils.text_postprocessors import first_option_postprocess

with read_base():
from .mathbench_prompt import zero_shot_prompts, few_shot_prompts, mathbench_sets

# Max for this dataset is 4
num_shot = 0
# Generate reasoning path or not, only for single choice
with_reasoning = True
# Use circular evaluation or not
with_circular_eval = True
# Use PPL mode in single choice test or not
use_ppl_single_choice = False

assert 0 <= num_shot <= 4
if num_shot == 0:
prompts = zero_shot_prompts
else:
prompts = {name: p[- 2 * num_shot - 2:] for name, p in few_shot_prompts.items()}

mathbench_datasets = []
for _split in mathbench_sets:
for _name in mathbench_sets[_split]:
if 'single_choice' in _name:
if with_reasoning:
template_round = prompts[_name + '_with_reasoning']
else:
template_round = prompts[_name]
else:
template_round = prompts[_name]

if 'single_choice' in _name:
pred_postprocessor = dict(type=first_option_postprocess, options='ABCD')
else:
pred_postprocessor = dict(type=math_postprocess_v2)

if 'single_choice' in _name and with_circular_eval:
evaluator = dict(type=CircularEvaluator)
else:
evaluator = dict(type=AccEvaluator)

# assemble the final config
mathbench_reader_cfg = dict(input_columns=['question'], output_column='answer')
if use_ppl_single_choice and 'single_choice' in _name and not with_reasoning:
template = {}
for answer in ['A', 'B', 'C', 'D']:
one_template_round = deepcopy(template_round)
one_template_round['round'][-1]['prompt'] = one_template_round['round'][-1]['prompt'].format(answer=answer)
template[answer] = dict(round=one_template_round)
mathbench_infer_cfg = dict(
prompt_template=dict(type=PromptTemplate, template=template),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
else:
mathbench_infer_cfg = dict(
prompt_template=dict(type=PromptTemplate, template=dict(round=template_round)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
mathbench_eval_cfg = dict(evaluator=evaluator, pred_postprocessor=pred_postprocessor)

mathbench_datasets.append(
dict(
abbr='mathbench-' + _split + '-' + _name,
type=MathBenchDataset,
path=f'data/mathbench_v1/{_split}',
name=_name,
with_circular=with_circular_eval,
reader_cfg=mathbench_reader_cfg,
infer_cfg=mathbench_infer_cfg,
eval_cfg=mathbench_eval_cfg,
)
)
Loading