voice_conversion_pipeline.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. from typing import Any, Dict
  3. import numpy as np
  4. import torch
  5. from modelscope.metainfo import Pipelines
  6. from modelscope.outputs import OutputKeys
  7. from modelscope.pipelines.base import Input, Pipeline
  8. from modelscope.pipelines.builder import PIPELINES
  9. from modelscope.utils.constant import Tasks
  10. @PIPELINES.register_module(
  11. Tasks.voice_conversion, module_name=Pipelines.voice_conversion)
  12. class VCPipeline(Pipeline):
  13. r"""ANS (Acoustic Noise Suppression) Inference Pipeline .
  14. When invoke the class with pipeline.__call__(), it accept only one
  15. parameter:
  16. inputs(str): the path of wav file
  17. """
  18. SAMPLE_RATE = 16000
  19. def __init__(self, model, **kwargs):
  20. """
  21. use `model` and `preprocessor` to create a kws pipeline for prediction
  22. Args:
  23. model: model id on modelscope hub.
  24. """
  25. super().__init__(model=model, **kwargs)
  26. self.model.eval()
  27. self.stream_mode = kwargs.get('stream_mode', False)
  28. def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
  29. return inputs
  30. def forward(self, inputs: Dict[str, Any],
  31. **forward_params) -> Dict[str, Any]:
  32. with torch.no_grad():
  33. outputs = self.model(inputs)
  34. outputs *= 32768.
  35. outputs = np.array(outputs, 'int16').tobytes()
  36. return {OutputKeys.OUTPUT_PCM: outputs}
  37. def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
  38. return inputs