Spaces:

Kamichanw
/

CIDEr

Runtime error

App Files Files Community

CIDEr / CIDEr.py

Kamichanw

Create CIDEr.py

f19b88e verified about 1 year ago

raw

history blame

4.93 kB

	from typing import List
	import datasets
	import evaluate
	import os
	import tempfile
	import subprocess

	from pycocoevalcap.cider.cider import CiderScorer, Cider

	_DESCRIPTION = """
	The CIDEr (Consensus-based Image Description Evaluation) metric is used to evaluate the quality of image captions generated by models in image captioning tasks.
	It measures how well the generated caption matches human-written reference captions by considering both the frequency and the relevance of words or phrases.
	Here is the formula for the CIDEr metric in LaTeX code:

	$
	\\text{CIDEr}(c_i, C) = \\frac{1}{N} \\sum_{n=1}^{N} w_n \\cdot \\frac{\\sum_{j=1}^{m} \\text{IDF}(g_j) \\cdot \\text{TF}(g_j, c_i)}{\\sum_{j=1}^{m} \\text{IDF}(g_j) \\cdot \\text{TF}(g_j, C)}
	$

	where:
	- $ c_i $ is the candidate caption,
	- $ C $ is the set of reference captions,
	- $ N $ is the number of n-grams (typically 1 to 4),
	- $ w_n $ is the weight for the n-gram,
	- $ g_j $ represents the j-th n-gram,
	- $ \\text{TF}(g_j, c_i) $ is the term frequency of the n-gram $ g_j $ in the candidate caption $ c_i $,
	- $ \\text{TF}(g_j, C) $ is the term frequency of the n-gram $ g_j $ in the reference captions $ C $,
	- $ \\text{IDF}(g_j) $ is the inverse document frequency of the n-gram $ g_j $.
	"""


	_KWARGS_DESCRIPTION = """
	Args:
	predictions (`list` of `str`): Predicted captions.
	references (`list` of `str` lists): Ground truth captions.
	n (int, defaults to 4): Number of ngrams for which (ngram) representation is calculated.
	sigma (float, defaults to 6.0): The standard deviation parameter for gaussian penalty.

	Returns:
	CIDEr (`float`): CIDEr value. Minimum possible value is 0. Maximum possible value is 100.

	"""


	_CITATION = """
	@inproceedings{vedantam2015cider,
	title={Cider: Consensus-based image description evaluation},
	author={Vedantam, Ramakrishna and Lawrence Zitnick, C and Parikh, Devi},
	booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
	pages={4566--4575},
	year={2015}
	}
	"""

	_URLS = {
	"stanford-corenlp": "https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/3.4.1/stanford-corenlp-3.4.1.jar"
	}


	def tokenize(tokenizer_path: str, predictions: List[str], references: List[List[str]]):
	PUNCTUATIONS = [
	"''",
	"'",
	"``",
	"`",
	"-LRB-",
	"-RRB-",
	"-LCB-",
	"-RCB-",
	".",
	"?",
	"!",
	",",
	":",
	"-",
	"--",
	"...",
	";",
	]

	cmd = [
	"java",
	"-cp",
	tokenizer_path,
	"edu.stanford.nlp.process.PTBTokenizer",
	"-preserveLines",
	"-lowerCase",
	]

	sentences = "\n".join(
	[
	s.replace("\n", " ")
	for s in predictions + [ref for refs in references for ref in refs]
	]
	)

	with tempfile.NamedTemporaryFile(delete=False) as f:
	f.write(sentences.encode())

	cmd.append(f.name)
	p_tokenizer = subprocess.Popen(cmd, stdout=subprocess.PIPE)
	token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0]
	token_lines = token_lines.decode()
	lines = [
	" ".join([w for w in line.rstrip().split(" ") if w not in PUNCTUATIONS])
	for line in token_lines.split("\n")
	]

	os.remove(f.name)

	pred_size = len(predictions)
	ref_sizes = [len(ref) for ref in references]

	predictions = lines[:pred_size]
	start = pred_size
	references = []
	for size in ref_sizes:
	references.append(lines[start : start + size])
	start += size

	return predictions, references


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class CIDEr(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	features=datasets.Features(
	{
	"predictions": datasets.Value("string", id="sequence"),
	"references": datasets.Sequence(
	datasets.Value("string", id="sequence"), id="references"
	),
	}
	),
	reference_urls=[
	"https://github.com/salaniz/pycocoevalcap",
	"https://github.com/tylin/coco-caption",
	],
	)

	def _download_and_prepare(self, dl_manager):
	self.tokenizer_path = dl_manager.download(_URLS["stanford-corenlp"])

	def _compute(self, predictions, references, n=4, sigma=6.0):
	predications, references = tokenize(
	self.tokenizer_path, predictions, references
	)
	scorer = CiderScorer(n, sigma)
	for pred, refs in zip(predications, references):
	scorer += (pred, refs)
	score, scores = scorer.compute_score()
	return {"CIDEr": score}