vllm.plugins.io_processors.interface

IOProcessorInput `module-attribute` ¶

IOProcessorInput = TypeVar('IOProcessorInput')

IOProcessorOutput `module-attribute` ¶

IOProcessorOutput = TypeVar('IOProcessorOutput')

IOProcessor ¶

Bases: ABC, Generic[IOProcessorInput, IOProcessorOutput]

Source code in vllm/plugins/io_processors/interface.py

class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):

    def __init__(self, vllm_config: VllmConfig):
        self.vllm_config = vllm_config

    @abstractmethod
    def pre_process(
        self,
        prompt: IOProcessorInput,
        request_id: Optional[str] = None,
        **kwargs,
    ) -> Union[PromptType, Sequence[PromptType]]:
        raise NotImplementedError

    async def pre_process_async(
        self,
        prompt: IOProcessorInput,
        request_id: Optional[str] = None,
        **kwargs,
    ) -> Union[PromptType, Sequence[PromptType]]:
        return self.pre_process(prompt, request_id, **kwargs)

    @abstractmethod
    def post_process(self,
                     model_output: Sequence[PoolingRequestOutput],
                     request_id: Optional[str] = None,
                     **kwargs) -> IOProcessorOutput:
        raise NotImplementedError

    async def post_process_async(
        self,
        model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
        request_id: Optional[str] = None,
        **kwargs,
    ) -> IOProcessorOutput:
        # We cannot guarantee outputs are returned in the same order they were
        # fed to vLLM.
        # Let's sort them by id before post_processing
        sorted_output = sorted([(i, item) async for i, item in model_output],
                               key=lambda output: output[0])
        collected_output = [output[1] for output in sorted_output]
        return self.post_process(collected_output, request_id, **kwargs)

    @abstractmethod
    def parse_request(self, request: Any) -> IOProcessorInput:
        raise NotImplementedError

    @abstractmethod
    def output_to_response(
            self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
        raise NotImplementedError

vllm_config `instance-attribute` ¶

vllm_config = vllm_config

init ¶

__init__(vllm_config: VllmConfig)

Source code in vllm/plugins/io_processors/interface.py

def __init__(self, vllm_config: VllmConfig):
    self.vllm_config = vllm_config

output_to_response `abstractmethod` ¶

output_to_response(
    plugin_output: IOProcessorOutput,
) -> IOProcessorResponse

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def output_to_response(
        self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
    raise NotImplementedError

parse_request `abstractmethod` ¶

parse_request(request: Any) -> IOProcessorInput

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def parse_request(self, request: Any) -> IOProcessorInput:
    raise NotImplementedError

post_process `abstractmethod` ¶

post_process(
    model_output: Sequence[PoolingRequestOutput],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def post_process(self,
                 model_output: Sequence[PoolingRequestOutput],
                 request_id: Optional[str] = None,
                 **kwargs) -> IOProcessorOutput:
    raise NotImplementedError

post_process_async `async` ¶

post_process_async(
    model_output: AsyncGenerator[
        tuple[int, PoolingRequestOutput]
    ],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput

Source code in vllm/plugins/io_processors/interface.py

async def post_process_async(
    self,
    model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
    request_id: Optional[str] = None,
    **kwargs,
) -> IOProcessorOutput:
    # We cannot guarantee outputs are returned in the same order they were
    # fed to vLLM.
    # Let's sort them by id before post_processing
    sorted_output = sorted([(i, item) async for i, item in model_output],
                           key=lambda output: output[0])
    collected_output = [output[1] for output in sorted_output]
    return self.post_process(collected_output, request_id, **kwargs)

pre_process `abstractmethod` ¶

pre_process(
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def pre_process(
    self,
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
    raise NotImplementedError

pre_process_async `async` ¶

pre_process_async(
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]

Source code in vllm/plugins/io_processors/interface.py

async def pre_process_async(
    self,
    prompt: IOProcessorInput,
    request_id: Optional[str] = None,
    **kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
    return self.pre_process(prompt, request_id, **kwargs)

vllm.plugins.io_processors.interface

IOProcessorInput module-attribute ¶

IOProcessorOutput module-attribute ¶

IOProcessor ¶

vllm_config instance-attribute ¶

__init__ ¶

output_to_response abstractmethod ¶

parse_request abstractmethod ¶

post_process abstractmethod ¶

post_process_async async ¶

pre_process abstractmethod ¶

pre_process_async async ¶

IOProcessorInput `module-attribute` ¶

IOProcessorOutput `module-attribute` ¶

vllm_config `instance-attribute` ¶

init ¶

output_to_response `abstractmethod` ¶

parse_request `abstractmethod` ¶

post_process `abstractmethod` ¶

post_process_async `async` ¶

pre_process `abstractmethod` ¶

pre_process_async `async` ¶