fswair · February 2, 2025 23:11
diff --git a/grpo_demo.py b/grpo_demo.py
 # thoughts

 # 1) if you do not use it for more than one time, do not define a variable
 # 2) add detailed annotations to understanding code and best editor support
 # 3) do not use f-string 4 times, you can apply them all in one f-string expression
 # # 3.1) if you did not change sep parameter, then you dont need to send strings one by one
 # 4) you can use f-string expression to multiply characters

 def correctness_reward_func2(prompts: List[List[Mapping[str, str]]], completions: List[Mapping[str, str]], answer: List[Any], **kwargs) -> list[float]:
    responses = [completion[0]['content'] for completion in completions]
    extracted_responses = [extract_xml_answer(r) for r in responses]
    print(f"{'':->20} Question:\n{prompts[0][-1]['content']}\nAnswer:\n{answer[0]}\nResponse:\n{responses[0]}\nExtracted:\n{extracted_responses[0]}")
    return [2.0 if r == a else 0.0 for r, a in zip(extracted_responses, answer)]
	# thoughts

	# 1) if you do not use it for more than one time, do not define a variable
	# 2) add detailed annotations to understanding code and best editor support
	# 3) do not use f-string 4 times, you can apply them all in one f-string expression
	# # 3.1) if you did not change sep parameter, then you dont need to send strings one by one
	# 4) you can use f-string expression to multiply characters

	def correctness_reward_func2(prompts: List[List[Mapping[str, str]]], completions: List[Mapping[str, str]], answer: List[Any], **kwargs) -> list[float]:
	responses = [completion[0]['content'] for completion in completions]
	extracted_responses = [extract_xml_answer(r) for r in responses]
	print(f"{'':->20} Question:\n{prompts[0][-1]['content']}\nAnswer:\n{answer[0]}\nResponse:\n{responses[0]}\nExtracted:\n{extracted_responses[0]}")
	return [2.0 if r == a else 0.0 for r, a in zip(extracted_responses, answer)]
No results found