Skip to content

vllm.plugins.io_processors.interface

IOProcessorInput module-attribute

IOProcessorInput = TypeVar('IOProcessorInput')

IOProcessorOutput module-attribute

IOProcessorOutput = TypeVar('IOProcessorOutput')

IOProcessor

Bases: ABC, Generic[IOProcessorInput, IOProcessorOutput]

Source code in vllm/plugins/io_processors/interface.py
class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):

    def __init__(self, vllm_config: VllmConfig):
        self.vllm_config = vllm_config

    @abstractmethod
    def pre_process(
        self,
        prompt: IOProcessorInput,
        request_id: Optional[str] = None,
        **kwargs,
    ) -> Union[PromptType, Sequence[PromptType]]:
        raise NotImplementedError

    async def pre_process_async(
        self,
        prompt: IOProcessorInput,
        request_id: Optional[str] = None,
        **kwargs,
    ) -> Union[PromptType, Sequence[PromptType]]:
        return self.pre_process(prompt, request_id, **kwargs)

    @abstractmethod
    def post_process(self,
                     model_output: Sequence[PoolingRequestOutput],
                     request_id: Optional[str] = None,
                     **kwargs) -> IOProcessorOutput:
        raise NotImplementedError

    async def post_process_async(
        self,
        model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
        request_id: Optional[str] = None,
        **kwargs,
    ) -> IOProcessorOutput:
        # We cannot guarantee outputs are returned in the same order they were
        # fed to vLLM.
        # Let's sort them by id before post_processing
        sorted_output = sorted([(i, item) async for i, item in model_output],
                               key=lambda output: output[0])
        collected_output = [output[1] for output in sorted_output]
        return self.post_process(collected_output, request_id, **kwargs)

    @abstractmethod
    def parse_request(self, request: Any) -> IOProcessorInput:
        raise NotImplementedError

    @abstractmethod
    def output_to_response(
            self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
        raise NotImplementedError

vllm_config instance-attribute

vllm_config = vllm_config

__init__

__init__(vllm_config: VllmConfig)
Source code in vllm/plugins/io_processors/interface.py
def __init__(self, vllm_config: VllmConfig):
    self.vllm_config = vllm_config

output_to_response abstractmethod

output_to_response(
    plugin_output: IOProcessorOutput,
) -> IOProcessorResponse
Source code in vllm/plugins/io_processors/interface.py
@abstractmethod
def output_to_response(
        self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
    raise NotImplementedError

parse_request abstractmethod

parse_request(request: Any) -> IOProcessorInput
Source code in vllm/plugins/io_processors/interface.py
@abstractmethod
def parse_request(self, request: Any) -> IOProcessorInput:
    raise NotImplementedError

post_process abstractmethod

post_process(
    model_output: Sequence[PoolingRequestOutput],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput
Source code in vllm/plugins/io_processors/interface.py
@abstractmethod
def post_process(self,
                 model_output: Sequence[PoolingRequestOutput],
                 request_id: Optional[str] = None,
                 **kwargs) -> IOProcessorOutput:
    raise NotImplementedError

post_process_async async

post_process_async(
    model_output: AsyncGenerator[
        tuple[int, PoolingRequestOutput]
    ],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput
Source code in vllm/plugins/io_processors/interface.py
async def post_process_async(
    self,
    model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput:
    # We cannot guarantee outputs are returned in the same order they were
    # fed to vLLM.
    # Let's sort them by id before post_processing
    sorted_output = sorted([(i, item) async for i, item in model_output],
                           key=lambda output: output[0])
    collected_output = [output[1] for output in sorted_output]
    return self.post_process(collected_output, request_id, **kwargs)

pre_process abstractmethod

pre_process(
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]
Source code in vllm/plugins/io_processors/interface.py
@abstractmethod
def pre_process(
    self,
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
    raise NotImplementedError

pre_process_async async

pre_process_async(
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]
Source code in vllm/plugins/io_processors/interface.py
async def pre_process_async(
    self,
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
    return self.pre_process(prompt, request_id, **kwargs)