智能体结构化输出功能详细介绍
# 安装最新版本
pip install zai-sdk
# 或指定版本
pip install zai-sdk==0.0.1
import zai
print(zai.__version__)
from zai import ZhipuAiClient
client = ZhipuAiClient(api_key="your-api-key")
# 基础JSON模式
response = client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "user",
"content": "请分析这句话的情感:'今天天气真好,心情很愉快!'"
}
],
response_format={
"type": "json_object"
}
)
import json
result = json.loads(response.choices[0].message.content)
print(result)
数据提取和结构化完整实现
class DataExtractor:
def __init__(self, api_key):
self.client = ZhipuAiClient(api_key=api_key)
def extract_contact_info(self, text):
"""提取联系信息"""
schema = {
"type": "object",
"properties": {
"contacts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"phone": {"type": "string"},
"email": {"type": "string"},
"company": {"type": "string"},
"position": {"type": "string"},
"address": {"type": "string"}
},
"required": ["name"]
}
},
"total_count": {"type": "integer"},
"extraction_confidence": {"type": "number"}
},
"required": ["contacts", "total_count"]
}
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
你是一个信息提取专家。请从文本中提取所有联系信息,
按照以下JSON格式返回:
{json.dumps(schema, indent=2, ensure_ascii=False)}
注意:
- 如果某个字段没有信息,不要包含该字段
- phone字段应该是标准化的电话号码格式
- email字段应该是有效的邮箱地址
- extraction_confidence表示提取的整体置信度(0-1)
"""
},
{
"role": "user",
"content": f"请从以下文本中提取联系信息:\n\n{text}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)["properties"]
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"提取失败: {e}")
return None
def extract_product_info(self, product_description):
"""提取产品信息"""
schema = {
"type": "object",
"properties": {
"product_name": {"type": "string"},
"brand": {"type": "string"},
"category": {"type": "string"},
"price": {
"type": "object",
"properties": {
"amount": {"type": "number"},
"currency": {"type": "string"},
"original_price": {"type": "number"},
"discount": {"type": "number"}
}
},
"specifications": {
"type": "object",
"additionalProperties": True
},
"features": {
"type": "array",
"items": {"type": "string"}
},
"availability": {
"type": "object",
"properties": {
"in_stock": {"type": "boolean"},
"quantity": {"type": "integer"},
"shipping_time": {"type": "string"}
}
},
"ratings": {
"type": "object",
"properties": {
"average_rating": {"type": "number"},
"total_reviews": {"type": "integer"}
}
}
},
"required": ["product_name"]
}
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请从产品描述中提取结构化信息,按照以下格式返回:
{json.dumps(schema, indent=2, ensure_ascii=False)}
注意:
- 价格信息要准确提取数值和货币单位
- specifications中包含所有技术规格
- features列出主要功能特点
- 如果信息不明确,不要猜测
"""
},
{
"role": "user",
"content": f"产品描述:\n{product_description}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"产品信息提取失败: {e}")
return None
def extract_event_info(self, event_text):
"""提取事件信息"""
schema = {
"type": "object",
"properties": {
"events": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"description": {"type": "string"},
"start_time": {"type": "string"},
"end_time": {"type": "string"},
"location": {"type": "string"},
"organizer": {"type": "string"},
"participants": {
"type": "array",
"items": {"type": "string"}
},
"category": {"type": "string"},
"priority": {
"type": "string",
"enum": ["high", "medium", "low"]
},
"status": {
"type": "string",
"enum": ["scheduled", "ongoing", "completed", "cancelled"]
}
},
"required": ["title", "start_time"]
}
}
},
"required": ["events"]
}
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请从文本中提取所有事件信息,按照以下格式返回:
{json.dumps(schema, indent=2, ensure_ascii=False)}
时间格式要求:
- 使用ISO 8601格式:YYYY-MM-DDTHH:MM:SS
- 如果只有日期,使用:YYYY-MM-DD
- 如果时间不明确,尽量推断合理的时间
"""
},
{
"role": "user",
"content": f"请提取以下文本中的事件信息:\n\n{event_text}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"事件信息提取失败: {e}")
return None
# 使用示例
extractor = DataExtractor("your_api_key")
# 提取联系信息
contact_text = """
张三,手机:13800138000,邮箱:zhangsan@example.com,
在北京科技有限公司担任技术总监。
公司地址:北京市朝阳区科技园区123号。
李四,电话:010-12345678,工作邮箱:lisi@company.com,
是上海创新公司的产品经理。
"""
contacts = extractor.extract_contact_info(contact_text)
if contacts:
print(f"提取到 {contacts['total_count']} 个联系人")
for contact in contacts['contacts']:
print(f"姓名: {contact['name']}")
if 'phone' in contact:
print(f"电话: {contact['phone']}")
API响应格式化完整实现
class APIResponseFormatter:
def __init__(self, api_key):
self.client = ZhipuAiClient(api_key=api_key)
def format_search_results(self, query, raw_results):
"""格式化搜索结果"""
schema = {
"type": "object",
"properties": {
"query": {"type": "string"},
"total_results": {"type": "integer"},
"results": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"url": {"type": "string"},
"snippet": {"type": "string"},
"relevance_score": {"type": "number"},
"source_type": {"type": "string"},
"publish_date": {"type": "string"},
"tags": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["title", "url", "snippet"]
}
},
"suggestions": {
"type": "array",
"items": {"type": "string"}
},
"filters": {
"type": "object",
"properties": {
"date_range": {"type": "string"},
"source_types": {
"type": "array",
"items": {"type": "string"}
},
"languages": {
"type": "array",
"items": {"type": "string"}
}
}
}
},
"required": ["query", "total_results", "results"]
}
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请将搜索结果格式化为标准的JSON格式:
{json.dumps(schema, indent=2, ensure_ascii=False)}
要求:
- 计算每个结果的相关性评分(0-1)
- 识别内容类型(article, video, image, document等)
- 提取发布日期(如果有)
- 生成相关标签
- 提供搜索建议
"""
},
{
"role": "user",
"content": f"查询: {query}\n\n原始结果:\n{raw_results}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"格式化失败: {e}")
return None
def format_analytics_data(self, raw_data, metrics):
"""格式化分析数据"""
schema = {
"type": "object",
"properties": {
"summary": {
"type": "object",
"properties": {
"total_records": {"type": "integer"},
"date_range": {
"type": "object",
"properties": {
"start_date": {"type": "string"},
"end_date": {"type": "string"}
}
},
"key_insights": {
"type": "array",
"items": {"type": "string"}
}
}
},
"metrics": {
"type": "object",
"additionalProperties": {
"type": "object",
"properties": {
"current_value": {"type": "number"},
"previous_value": {"type": "number"},
"change_percentage": {"type": "number"},
"trend": {
"type": "string",
"enum": ["up", "down", "stable"]
},
"unit": {"type": "string"}
}
}
},
"time_series": {
"type": "array",
"items": {
"type": "object",
"properties": {
"timestamp": {"type": "string"},
"values": {
"type": "object",
"additionalProperties": {"type": "number"}
}
}
}
},
"segments": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"value": {"type": "number"},
"percentage": {"type": "number"},
"color": {"type": "string"}
}
}
}
},
"required": ["summary", "metrics"]
}
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请将分析数据格式化为标准格式:
{json.dumps(schema, indent=2, ensure_ascii=False)}
关注指标:{', '.join(metrics)}
要求:
- 计算变化百分比和趋势
- 提供关键洞察
- 时间序列数据按时间排序
- 分段数据包含百分比
"""
},
{
"role": "user",
"content": f"原始数据:\n{raw_data}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"分析数据格式化失败: {e}")
return None
# 使用示例
formatter = APIResponseFormatter("your_api_key")
# 格式化搜索结果
raw_search = """
1. Python编程入门教程 - https://example.com/python-tutorial
详细介绍Python基础语法和编程概念...
2. Python数据分析实战 - https://example.com/python-data
使用pandas和numpy进行数据处理...
"""
formatted_results = formatter.format_search_results("Python教程", raw_search)
if formatted_results:
print(f"找到 {formatted_results['total_results']} 个结果")
for result in formatted_results['results']:
print(f"标题: {result['title']}")
print(f"相关性: {result['relevance_score']}")
配置管理和验证完整实现
class ConfigurationManager:
def __init__(self, api_key):
self.client = ZhipuAiClient(api_key=api_key)
def parse_config_file(self, config_text, config_type="general"):
"""解析配置文件"""
schemas = {
"database": {
"type": "object",
"properties": {
"connections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"host": {"type": "string"},
"port": {"type": "integer"},
"database": {"type": "string"},
"username": {"type": "string"},
"ssl": {"type": "boolean"},
"pool_size": {"type": "integer"}
},
"required": ["name", "host", "database"]
}
},
"settings": {
"type": "object",
"properties": {
"timeout": {"type": "integer"},
"retry_attempts": {"type": "integer"},
"log_level": {
"type": "string",
"enum": ["DEBUG", "INFO", "WARNING", "ERROR"]
}
}
}
},
"required": ["connections"]
},
"api": {
"type": "object",
"properties": {
"endpoints": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"url": {"type": "string"},
"method": {
"type": "string",
"enum": ["GET", "POST", "PUT", "DELETE"]
},
"headers": {"type": "object"},
"timeout": {"type": "integer"},
"rate_limit": {"type": "integer"}
},
"required": ["name", "url", "method"]
}
},
"authentication": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["bearer", "basic", "api_key"]
},
"credentials": {"type": "object"}
}
}
},
"required": ["endpoints"]
}
}
schema = schemas.get(config_type, {
"type": "object",
"additionalProperties": True
})
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请解析配置文件并转换为JSON格式:
{json.dumps(schema, indent=2, ensure_ascii=False)}
配置类型:{config_type}
要求:
- 识别配置项和值
- 转换数据类型(字符串、数字、布尔值)
- 处理数组和嵌套对象
- 验证必需字段
- 提供默认值(如适用)
"""
},
{
"role": "user",
"content": f"配置文件内容:\n{config_text}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=schema)
return result
except Exception as e:
print(f"配置解析失败: {e}")
return None
def validate_configuration(self, config_data, validation_rules):
"""验证配置"""
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请验证配置数据并返回验证结果:
返回格式:
{{
"is_valid": true/false,
"errors": [
{{
"field": "字段名",
"error": "错误描述",
"severity": "error/warning/info"
}}
],
"warnings": [
{{
"field": "字段名",
"message": "警告信息"
}}
],
"suggestions": [
"改进建议1",
"改进建议2"
]
}}
验证规则:{validation_rules}
"""
},
{
"role": "user",
"content": f"配置数据:\n{json.dumps(config_data, indent=2, ensure_ascii=False)}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
return result
except Exception as e:
print(f"配置验证失败: {e}")
return None
# 使用示例
config_manager = ConfigurationManager("your_api_key")
# 解析数据库配置
db_config_text = """
[database]
host = localhost
port = 5432
database = myapp
username = admin
ssl = true
pool_size = 10
[settings]
timeout = 30
retry_attempts = 3
log_level = INFO
"""
config = config_manager.parse_config_file(db_config_text, "database")
if config:
print("解析的配置:", json.dumps(config, indent=2, ensure_ascii=False))
# 验证配置
validation_rules = [
"端口号必须在1-65535范围内",
"数据库名不能为空",
"连接池大小应该大于0",
"超时时间应该合理(1-300秒)"
]
validation_result = config_manager.validate_configuration(config, validation_rules)
if validation_result:
print(f"配置有效性: {validation_result['is_valid']}")
if validation_result['errors']:
print("错误:", validation_result['errors'])
if validation_result['warnings']:
print("警告:", validation_result['warnings'])
动态Sechema生成完整实现
class DynamicSchemaGenerator:
def __init__(self, api_key):
self.client = ZhipuAiClient(api_key=api_key)
def generate_schema_from_example(self, example_data, schema_name="generated_schema"):
"""从示例数据生成Schema"""
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": """
请根据示例数据生成JSON Schema。
返回格式:
{
"schema_name": "schema名称",
"schema": {
// 完整的JSON Schema定义
},
"description": "Schema描述",
"examples": [
// 符合Schema的示例数据
]
}
要求:
- 分析数据类型和结构
- 识别必需字段和可选字段
- 添加适当的验证规则
- 包含字段描述
- 处理数组和嵌套对象
"""
},
{
"role": "user",
"content": f"Schema名称: {schema_name}\n\n示例数据:\n{json.dumps(example_data, indent=2, ensure_ascii=False)}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
return result
except Exception as e:
print(f"Schema生成失败: {e}")
return None
def merge_schemas(self, schemas, merged_name="merged_schema"):
"""合并多个Schema"""
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": """
请合并多个JSON Schema为一个统一的Schema。
返回格式:
{
"merged_schema": {
// 合并后的Schema
},
"conflicts": [
{
"field": "字段名",
"conflict_type": "类型冲突/必需性冲突等",
"resolution": "解决方案"
}
],
"notes": "合并说明"
}
合并规则:
- 相同字段以最宽松的类型为准
- 可选字段优先于必需字段
- 保留所有可能的枚举值
- 记录冲突和解决方案
"""
},
{
"role": "user",
"content": f"要合并的Schema:\n{json.dumps(schemas, indent=2, ensure_ascii=False)}"
}
],
response_format={"type": "json_object"}
)
try:
result = json.loads(response.choices[0].message.content)
return result
except Exception as e:
print(f"Schema合并失败: {e}")
return None
# 使用示例
schema_gen = DynamicSchemaGenerator("your_api_key")
# 从示例生成Schema
example_user_data = {
"id": 12345,
"name": "张三",
"email": "zhangsan@example.com",
"age": 30,
"is_active": True,
"tags": ["developer", "python"],
"profile": {
"bio": "Python开发者",
"location": "北京",
"website": "https://example.com"
}
}
generated_schema = schema_gen.generate_schema_from_example(example_user_data, "user_profile")
if generated_schema:
print("生成的Schema:", json.dumps(generated_schema['schema'], indent=2, ensure_ascii=False))
错误处理和重试机制完整实现
import time
import random
from functools import wraps
def retry_on_failure(max_retries=3, delay=1, backoff=2):
"""重试装饰器"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
raise e
wait_time = delay * (backoff ** (retries - 1))
wait_time += random.uniform(0, 1) # 添加随机抖动
print(f"重试 {retries}/{max_retries},等待 {wait_time:.2f} 秒...")
time.sleep(wait_time)
return None
return wrapper
return decorator
class RobustJSONProcessor:
def __init__(self, api_key):
self.client = ZhipuAiClient(api_key=api_key)
@retry_on_failure(max_retries=3)
def process_with_fallback(self, prompt, primary_schema, fallback_schema=None):
"""带降级策略的JSON处理"""
try:
# 尝试主要Schema
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请按照以下JSON格式返回结果:
{json.dumps(primary_schema, indent=2, ensure_ascii=False)}
"""
},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=primary_schema)
return {"success": True, "data": result, "schema_used": "primary"}
except Exception as e:
print(f"主要Schema失败: {e}")
if fallback_schema:
try:
# 尝试降级Schema
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": f"""
请按照以下简化的JSON格式返回结果:
{json.dumps(fallback_schema, indent=2, ensure_ascii=False)}
"""
},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
validate(instance=result, schema=fallback_schema)
return {"success": True, "data": result, "schema_used": "fallback"}
except Exception as fallback_error:
print(f"降级Schema也失败: {fallback_error}")
# 最后的降级:返回基本文本
try:
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[
{
"role": "system",
"content": "请返回一个简单的JSON对象,包含result字段。"
},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
return {"success": True, "data": result, "schema_used": "minimal"}
except Exception as final_error:
return {"success": False, "error": str(final_error)}
def validate_and_fix(self, data, schema, max_fix_attempts=2):
"""验证并尝试修复JSON数据"""
try:
validate(instance=data, schema=schema)
return {"valid": True, "data": data, "fixes_applied": []}
except jsonschema.exceptions.ValidationError as e:
print(f"验证失败: {e.message}")
# 尝试自动修复
fixes_applied = []
fixed_data = data.copy()
for attempt in range(max_fix_attempts):
try:
# 使用AI修复数据
fix_prompt = f"""
以下JSON数据不符合Schema要求:
数据:{json.dumps(fixed_data, ensure_ascii=False)}
Schema:{json.dumps(schema, ensure_ascii=False)}
错误:{e.message}
请修复数据使其符合Schema要求,返回修复后的JSON:
"""
response = self.client.chat.completions.create(
model="glm-4-plus",
messages=[{"role": "user", "content": fix_prompt}],
response_format={"type": "json_object"}
)
fixed_data = json.loads(response.choices[0].message.content)
validate(instance=fixed_data, schema=schema)
fixes_applied.append(f"尝试 {attempt + 1}: 修复成功")
return {"valid": True, "data": fixed_data, "fixes_applied": fixes_applied}
except Exception as fix_error:
fixes_applied.append(f"尝试 {attempt + 1}: {str(fix_error)}")
continue
return {"valid": False, "data": data, "fixes_applied": fixes_applied}
# 使用示例
processor = RobustJSONProcessor("your_api_key")
# 定义主要和降级Schema
primary_schema = {
"type": "object",
"properties": {
"analysis": {"type": "string"},
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
"keywords": {"type": "array", "items": {"type": "string"}},
"entities": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"type": {"type": "string"},
"confidence": {"type": "number"}
}
}
}
},
"required": ["analysis", "sentiment", "confidence"]
}
fallback_schema = {
"type": "object",
"properties": {
"analysis": {"type": "string"},
"sentiment": {"type": "string"}
},
"required": ["analysis", "sentiment"]
}
# 处理文本
result = processor.process_with_fallback(
"今天天气真好,心情很愉快!",
primary_schema,
fallback_schema
)
if result["success"]:
print(f"处理成功,使用了 {result['schema_used']} Schema")
print("结果:", json.dumps(result["data"], indent=2, ensure_ascii=False))
else:
print(f"处理失败: {result['error']}")