Clickhouse 副本间数据同步是通过 Zookeeper 来实现的。首先部署 Zookeeper 集群,部署方法参考: docker 部署 Zookeeper 3.8.0
部署后,拉取 Clickhouse
镜像:docker pull clickhouse/clickhouse-server:23.1.2.9
首先创建配置文件:
<clickhouse>
<database_atomic_delay_before_drop_table_sec>0</database_atomic_delay_before_drop_table_sec>
<logger>
<!--
日志等级,从低到高分别有 fatal, critical, error, warning, notice, information, debug, trace, test。
如果设置 为 none 则禁用日志输出
-->
<level>trace</level>
<!-- 普通日志输出路径 -->
<log>/var/lib/clickhouse/log/clickhouse-server.log</log>
<!-- 异常日志输出路径 -->
<errorlog>/var/lib/clickhouse/log/clickhouse-server.err.log</errorlog>
<!-- 日志保留策略,参见 https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85 -->
<size>1000M</size>
<count>10</count>
</logger>
<!-- HTTP 响应头额外设置,默认不设置 -->
<!-- http_options_response>
<header>
<name>Access-Control-Allow-Origin</name>
<value>*</value>
</header>
<header>
<name>Access-Control-Allow-Headers</name>
<value>origin, x-requested-with</value>
</header>
<header>
<name>Access-Control-Allow-Methods</name>
<value>POST, GET, OPTIONS</value>
</header>
<header>
<name>Access-Control-Max-Age</name>
<value>86400</value>
</header>
</http_options_response -->
<!-- clickhouse 的 http 监听端口 -->
<http_port>8123</http_port>
<!-- clickhouse 的 tcp 监听端口 -->
<tcp_port>9000</tcp_port>
<!-- clickhouse 伪装为 mysql 的监听端口,即使用 mysql 的驱动可以链接、操作的端口。 -->
<mysql_port>3306</mysql_port>
<!-- clickhouse 伪装为 postgresql 的监听端口,即使用 postgresql 的驱动可以链接、操作的端口。 -->
<postgresql_port>5432</postgresql_port>
<!-- 副本集群内部,节点间 http 通信端口 -->
<interserver_http_port>9009</interserver_http_port>
<!-- 最大链接数 -->
<max_connections>4096</max_connections>
<!-- For 'Connection: keep-alive' in HTTP 1.1 -->
<keep_alive_timeout>3</keep_alive_timeout>
<!-- grpc 端口配置 -->
<!-- <grpc_port>9100</grpc_port> -->
<!-- grpc 相关配置 -->
<grpc>
<!-- grpc ssl及证书 相关配置 -->
<enable_ssl>false</enable_ssl>
<ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
<ssl_key_file>/path/to/ssl_key_file</ssl_key_file>
<ssl_require_client_auth>false</ssl_require_client_auth>
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
<transport_compression_type>none</transport_compression_type>
<transport_compression_level>0</transport_compression_level>
<max_send_message_size>-1</max_send_message_size>
<max_receive_message_size>-1</max_receive_message_size>
<verbose_logs>false</verbose_logs>
</grpc>
<openSSL>
<server>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
</server>
<client>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<concurrent_threads_soft_limit_num>0</concurrent_threads_soft_limit_num>
<concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
<max_concurrent_queries>100</max_concurrent_queries>
<!-- 最大可用内存,默认是 max_server_memory_usage_to_ram_ratio 的值,如果设置的值大于 max_server_memory_usage_to_ram_ratio 则以 max_server_memory_usage_to_ram_ratio 为准/ -->
<max_server_memory_usage>0</max_server_memory_usage>
<!-- 全局线程池中的最大线程数。 -->
<max_thread_pool_size>10000</max_thread_pool_size>
<!-- 最大可用内存在物理资源中的最高占比 -->
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mmap_cache_size>1000</mmap_cache_size>
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<!-- 数据的存储路径,最后需要带 / -->
<path>/var/lib/clickhouse/disk/data/</path>
<!-- 临时数据路径 -->
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<allow_plaintext_password>1</allow_plaintext_password>
<allow_no_password>1</allow_no_password>
<allow_implicit_no_password>1</allow_implicit_no_password>
<!-- 用户文件存储目录,可以使用 file 表引擎访问 -->
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<!-- 用户、角色、访问权限、配置文件、配额的数据管理 -->
<user_directories>
<users_xml>
<path>users.xml</path>
</users_xml>
<local_directory>
<path>/var/lib/clickhouse/access/</path>
</local_directory>
</user_directories>
<access_control_improvements>
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
<on_cluster_queries_require_cluster_grant>false</on_cluster_queries_require_cluster_grant>
<select_from_system_db_requires_grant>false</select_from_system_db_requires_grant>
<select_from_information_schema_requires_grant>false</select_from_information_schema_requires_grant>
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
</access_control_improvements>
<!-- 默认配置文件. -->
<default_profile>default</default_profile>
<!-- 用户自定义设置的前缀列表 -->
<custom_settings_prefixes></custom_settings_prefixes>
<!-- 默认数据库 -->
<default_database>default</default_database>
<mlock_executable>true</mlock_executable>
<remap_executable>false</remap_executable>
<!-- zookeeper 配置,这里的地址需要改成上面 zookeeper 集群的地址 -->
<zookeeper>
<node>
<host>zkhost1</host>
<port>2181</port>
</node>
<node>
<host>zkhost2</host>
<port>2181</port>
</node>
<node>
<host>zkhost3</host>
<port>2181</port>
</node>
</zookeeper>
<!-- 复制表的默认参数 -->
<macros>
<shard>01</shard>
<replica>replica-1</replica>
</macros>
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_name>
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>60</default_session_timeout>
<!-- 适用于 prometheus 指标数据暴露配置 -->
<!--
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<status_info>true</status_info>
</prometheus>
-->
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<query_views_log>
<database>system</database>
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log>
<part_log>
<database>system</database>
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<processors_profile_log>
<database>system</database>
<table>processors_profile_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</processors_profile_log>
<asynchronous_insert_log>
<database>system</database>
<table>asynchronous_insert_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<partition_by>event_date</partition_by>
<ttl>event_date + INTERVAL 3 DAY</ttl>
</asynchronous_insert_log>
<top_level_domains_lists>
</top_level_domains_lists>
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
<encryption_codecs>
</encryption_codecs>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<format_schema_path>/var/lib/clickhouse/disk/format_schemas/</format_schema_path>
<send_crash_reports>
<!-- Changing <enabled> to true allows sending crash reports to -->
<!-- the ClickHouse core developers team via Sentry https://sentry.io -->
<!-- Doing so at least in pre-production environments is highly appreciated -->
<enabled>false</enabled>
<!-- Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report -->
<anonymize>false</anonymize>
<!-- Default endpoint should be changed to different Sentry DSN only if you have -->
<!-- some in-house engineers or hired consultants who're going to debug ClickHouse issues for you -->
<endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
</send_crash_reports>
</clickhouse>
之后需要几个复制节点,就启动几个。
启动时需要注意:
- 内网中使用一般暴露:8123,9000,9009 三个即可。3306,5432 可视情况暴露。
- 容器中,需要把配置文件挂载为
/etc/clickhouse-server/config.xml
。 - 按上面的配置文件运行的话,数据需要挂载在
/var/lib/clickhouse/disk
目录下。
启动后,在多个节点中建立相同的数据库,并在同样的数据库内建立相同表明的 ReplicatedReplacingMergeTree
即可实现表的自动复制。