_observability.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. # Copyright 2023 The gRPC authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import annotations
  15. import abc
  16. import contextlib
  17. import logging
  18. import threading
  19. from typing import (
  20. Any,
  21. Generator,
  22. Generic,
  23. List,
  24. Optional,
  25. Tuple,
  26. TypeVar,
  27. Union,
  28. )
  29. from grpc._cython import cygrpc as _cygrpc
  30. from grpc._typing import ChannelArgumentType
  31. _LOGGER = logging.getLogger(__name__)
  32. _channel = Any # _channel.py imports this module.
  33. ClientCallTracerCapsule = TypeVar("ClientCallTracerCapsule")
  34. ServerCallTracerFactoryCapsule = TypeVar("ServerCallTracerFactoryCapsule")
  35. _plugin_lock: threading.RLock = threading.RLock()
  36. _OBSERVABILITY_PLUGIN: Optional["ObservabilityPlugin"] = None
  37. _SERVICES_TO_EXCLUDE: List[bytes] = [
  38. b"google.monitoring.v3.MetricService",
  39. b"google.devtools.cloudtrace.v2.TraceService",
  40. ]
  41. class ServerCallTracerFactory:
  42. """An encapsulation of a ServerCallTracerFactory.
  43. Instances of this class can be passed to a Channel as values for the
  44. grpc.experimental.server_call_tracer_factory option
  45. """
  46. def __init__(self, address):
  47. self._address = address
  48. def __int__(self):
  49. return self._address
  50. class ObservabilityPlugin(
  51. Generic[ClientCallTracerCapsule, ServerCallTracerFactoryCapsule],
  52. metaclass=abc.ABCMeta,
  53. ):
  54. """Abstract base class for observability plugin.
  55. *This is a semi-private class that was intended for the exclusive use of
  56. the gRPC team.*
  57. The ClientCallTracerCapsule and ClientCallTracerCapsule created by this
  58. plugin should be injected to gRPC core using observability_init at the
  59. start of a program, before any channels/servers are built.
  60. Any future methods added to this interface cannot have the
  61. @abc.abstractmethod annotation.
  62. Attributes:
  63. _stats_enabled: A bool indicates whether tracing is enabled.
  64. _tracing_enabled: A bool indicates whether stats(metrics) is enabled.
  65. _registered_methods: A set which stores the registered method names in
  66. bytes.
  67. """
  68. _tracing_enabled: bool = False
  69. _stats_enabled: bool = False
  70. @abc.abstractmethod
  71. def create_client_call_tracer(
  72. self, method_name: bytes, target: bytes
  73. ) -> ClientCallTracerCapsule:
  74. """Creates a ClientCallTracerCapsule.
  75. After register the plugin, if tracing or stats is enabled, this method
  76. will be called after a call was created, the ClientCallTracer created
  77. by this method will be saved to call context.
  78. The ClientCallTracer is an object which implements `grpc_core::ClientCallTracer`
  79. interface and wrapped in a PyCapsule using `client_call_tracer` as name.
  80. Args:
  81. method_name: The method name of the call in byte format.
  82. target: The channel target of the call in byte format.
  83. registered_method: Whether this method is pre-registered.
  84. Returns:
  85. A PyCapsule which stores a ClientCallTracer object.
  86. """
  87. raise NotImplementedError()
  88. @abc.abstractmethod
  89. def save_trace_context(
  90. self, trace_id: str, span_id: str, is_sampled: bool
  91. ) -> None:
  92. """Saves the trace_id and span_id related to the current span.
  93. After register the plugin, if tracing is enabled, this method will be
  94. called after the server finished sending response.
  95. This method can be used to propagate census context.
  96. Args:
  97. trace_id: The identifier for the trace associated with the span as a
  98. 32-character hexadecimal encoded string,
  99. e.g. 26ed0036f2eff2b7317bccce3e28d01f
  100. span_id: The identifier for the span as a 16-character hexadecimal encoded
  101. string. e.g. 113ec879e62583bc
  102. is_sampled: A bool indicates whether the span is sampled.
  103. """
  104. raise NotImplementedError()
  105. @abc.abstractmethod
  106. def create_server_call_tracer_factory(
  107. self,
  108. *,
  109. xds: bool = False,
  110. ) -> Optional[ServerCallTracerFactoryCapsule]:
  111. """Creates a ServerCallTracerFactoryCapsule.
  112. This method will be called at server initialization time to create a
  113. ServerCallTracerFactory, which will be registered to gRPC core.
  114. The ServerCallTracerFactory is an object which implements
  115. `grpc_core::ServerCallTracerFactory` interface and wrapped in a PyCapsule
  116. using `server_call_tracer_factory` as name.
  117. Args:
  118. xds: Whether the server is xds server.
  119. Returns:
  120. A PyCapsule which stores a ServerCallTracerFactory object. Or None if
  121. plugin decides not to create ServerCallTracerFactory.
  122. """
  123. raise NotImplementedError()
  124. @abc.abstractmethod
  125. def record_rpc_latency(
  126. self, method: str, target: str, rpc_latency: float, status_code: Any
  127. ) -> None:
  128. """Record the latency of the RPC.
  129. After register the plugin, if stats is enabled, this method will be
  130. called at the end of each RPC.
  131. Args:
  132. method: The fully-qualified name of the RPC method being invoked.
  133. target: The target name of the RPC method being invoked.
  134. rpc_latency: The latency for the RPC in seconds, equals to the time between
  135. when the client invokes the RPC and when the client receives the status.
  136. status_code: An element of grpc.StatusCode in string format representing the
  137. final status for the RPC.
  138. """
  139. raise NotImplementedError()
  140. def set_tracing(self, enable: bool) -> None:
  141. """Enable or disable tracing.
  142. Args:
  143. enable: A bool indicates whether tracing should be enabled.
  144. """
  145. self._tracing_enabled = enable
  146. def set_stats(self, enable: bool) -> None:
  147. """Enable or disable stats(metrics).
  148. Args:
  149. enable: A bool indicates whether stats should be enabled.
  150. """
  151. self._stats_enabled = enable
  152. def save_registered_method(self, method_name: bytes) -> None:
  153. """Saves the method name to registered_method list.
  154. When exporting metrics, method name for unregistered methods will be replaced
  155. with 'other' by default.
  156. Args:
  157. method_name: The method name in bytes.
  158. """
  159. raise NotImplementedError()
  160. @property
  161. def tracing_enabled(self) -> bool:
  162. return self._tracing_enabled
  163. @property
  164. def stats_enabled(self) -> bool:
  165. return self._stats_enabled
  166. @property
  167. def observability_enabled(self) -> bool:
  168. return self.tracing_enabled or self.stats_enabled
  169. @contextlib.contextmanager
  170. def get_plugin() -> Generator[Optional[ObservabilityPlugin], None, None]:
  171. """Get the ObservabilityPlugin in _observability module.
  172. Returns:
  173. The ObservabilityPlugin currently registered with the _observability
  174. module. Or None if no plugin exists at the time of calling this method.
  175. """
  176. with _plugin_lock:
  177. yield _OBSERVABILITY_PLUGIN
  178. def set_plugin(observability_plugin: Optional[ObservabilityPlugin]) -> None:
  179. """Save ObservabilityPlugin to _observability module.
  180. Args:
  181. observability_plugin: The ObservabilityPlugin to save.
  182. Raises:
  183. ValueError: If an ObservabilityPlugin was already registered at the
  184. time of calling this method.
  185. """
  186. global _OBSERVABILITY_PLUGIN # pylint: disable=global-statement
  187. with _plugin_lock:
  188. if observability_plugin and _OBSERVABILITY_PLUGIN:
  189. raise ValueError("observability_plugin was already set!")
  190. _OBSERVABILITY_PLUGIN = observability_plugin
  191. def observability_init(observability_plugin: ObservabilityPlugin) -> None:
  192. """Initialize observability with provided ObservabilityPlugin.
  193. This method have to be called at the start of a program, before any
  194. channels/servers are built.
  195. Args:
  196. observability_plugin: The ObservabilityPlugin to use.
  197. Raises:
  198. ValueError: If an ObservabilityPlugin was already registered at the
  199. time of calling this method.
  200. """
  201. set_plugin(observability_plugin)
  202. def observability_deinit() -> None:
  203. """Clear the observability context, including ObservabilityPlugin and
  204. ServerCallTracerFactory
  205. This method have to be called after exit observability context so that
  206. it's possible to re-initialize again.
  207. """
  208. set_plugin(None)
  209. _cygrpc.clear_server_call_tracer_factory()
  210. def maybe_record_rpc_latency(state: "_channel._RPCState") -> None:
  211. """Record the latency of the RPC, if the plugin is registered and stats is enabled.
  212. This method will be called at the end of each RPC.
  213. Args:
  214. state: a grpc._channel._RPCState object which contains the stats related to the
  215. RPC.
  216. """
  217. # TODO(xuanwn): use channel args to exclude those metrics.
  218. for exclude_prefix in _SERVICES_TO_EXCLUDE:
  219. if exclude_prefix in state.method.encode("utf8"):
  220. return
  221. with get_plugin() as plugin:
  222. if plugin and plugin.stats_enabled:
  223. rpc_latency_s = state.rpc_end_time - state.rpc_start_time
  224. rpc_latency_ms = rpc_latency_s * 1000
  225. plugin.record_rpc_latency(
  226. state.method, state.target, rpc_latency_ms, state.code
  227. )
  228. def create_server_call_tracer_factory_option(
  229. xds: bool,
  230. ) -> Union[Tuple[ChannelArgumentType], Tuple[()]]:
  231. with get_plugin() as plugin:
  232. if plugin and plugin.stats_enabled:
  233. server_call_tracer_factory_address = (
  234. _cygrpc.get_server_call_tracer_factory_address(plugin, xds)
  235. )
  236. if server_call_tracer_factory_address:
  237. return (
  238. (
  239. "grpc.experimental.server_call_tracer_factory",
  240. ServerCallTracerFactory(
  241. server_call_tracer_factory_address
  242. ),
  243. ),
  244. )
  245. return ()