| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- # Part of the implementation is borrowed and modified from PGL-SUM,
- # publicly available at https://github.com/e-apostolidis/PGL-SUM
- from typing import Dict
- import numpy as np
- from modelscope.metainfo import Metrics
- from modelscope.models.cv.video_summarization.summarizer import \
- generate_summary
- from modelscope.utils.registry import default_group
- from .base import Metric
- from .builder import METRICS, MetricKeys
- def evaluate_summary(predicted_summary, user_summary, eval_method):
- """ Compare the predicted summary with the user defined one(s).
- :param ndarray predicted_summary: The generated summary from our model.
- :param ndarray user_summary: The user defined ground truth summaries (or summary).
- :param str eval_method: The proposed evaluation method; either 'max' (SumMe) or 'avg' (TVSum).
- :return: The reduced fscore based on the eval_method
- """
- max_len = max(len(predicted_summary), user_summary.shape[1])
- S = np.zeros(max_len, dtype=int)
- G = np.zeros(max_len, dtype=int)
- S[:len(predicted_summary)] = predicted_summary
- f_scores = []
- for user in range(user_summary.shape[0]):
- G[:user_summary.shape[1]] = user_summary[user]
- overlapped = S & G
- # Compute precision, recall, f-score
- precision = sum(overlapped) / sum(S)
- recall = sum(overlapped) / sum(G)
- if precision + recall == 0:
- f_scores.append(0)
- else:
- f_score = 2 * precision * recall * 100 / (precision + recall)
- f_scores.append(f_score)
- if eval_method == 'max':
- return max(f_scores)
- else:
- return sum(f_scores) / len(f_scores)
- def calculate_f_score(outputs: Dict, inputs: Dict):
- scores = outputs['scores']
- scores = scores.squeeze(0).cpu().numpy().tolist()
- user_summary = inputs['user_summary'].cpu().numpy()[0]
- sb = inputs['change_points'].cpu().numpy()[0]
- n_frames = inputs['n_frames'].cpu().numpy()[0]
- positions = inputs['positions'].cpu().numpy()[0]
- summary = generate_summary([sb], [scores], [n_frames], [positions])[0]
- f_score = evaluate_summary(summary, user_summary, 'avg')
- return f_score
- @METRICS.register_module(
- group_key=default_group, module_name=Metrics.video_summarization_metric)
- class VideoSummarizationMetric(Metric):
- """The metric for video summarization task.
- """
- def __init__(self):
- self.inputs = []
- self.outputs = []
- def add(self, outputs: Dict, inputs: Dict):
- self.outputs.append(outputs)
- self.inputs.append(inputs)
- def evaluate(self):
- f_scores = [
- calculate_f_score(output, input)
- for output, input in zip(self.outputs, self.inputs)
- ]
- return {MetricKeys.FScore: sum(f_scores) / len(f_scores)}
- def merge(self, other: 'VideoSummarizationMetric'):
- self.inputs.extend(other.inputs)
- self.outputs.extend(other.outputs)
- def __getstate__(self):
- return self.inputs, self.outputs
- def __setstate__(self, state):
- self.inputs, self.outputs = state
|