Skip to content

Commit 51a7534

Browse files
support llama parse audio (#859)
1 parent 4f5d2bd commit 51a7534

File tree

5 files changed

+18
-3
lines changed

5 files changed

+18
-3
lines changed

py/llama_cloud_services/parse/types.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class ChartItem(BaseModel):
112112
class Page(BaseModel):
113113
"""A page of the document."""
114114

115-
page: int = Field(description="The page number.")
115+
page: int = Field(default=0, description="The page number.")
116116
text: Optional[str] = Field(default=None, description="The text of the page.")
117117
md: Optional[str] = Field(default=None, description="The markdown of the page.")
118118
images: List[ImageItem] = Field(
@@ -153,6 +153,12 @@ class Page(BaseModel):
153153
noTextContent: bool = Field(
154154
default=False, description="Whether the page has no text content."
155155
)
156+
isAudioTranscript: bool = Field(
157+
default=False, description="Whether the page is an audio transcript."
158+
)
159+
durationInSeconds: Optional[float] = Field(
160+
default=None, description="The duration of the audio transcript in seconds."
161+
)
156162

157163

158164
class JobResult(BaseModel):

py/llama_parse/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dev = [
1111

1212
[project]
1313
name = "llama-parse"
14-
version = "0.6.56"
14+
version = "0.6.57"
1515
description = "Parse files into RAG-Optimized formats."
1616
authors = [{name = "Logan Markewich", email = "[email protected]"}]
1717
requires-python = ">=3.9,<4.0"

py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dev = [
1818

1919
[project]
2020
name = "llama-cloud-services"
21-
version = "0.6.56"
21+
version = "0.6.57"
2222
description = "Tailored SDK clients for LlamaCloud services."
2323
authors = [{name = "Logan Markewich", email = "[email protected]"}]
2424
requires-python = ">=3.9,<4.0"

py/tests/parse/test_llama_parse.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,12 @@ async def test_get_result(markdown_parser: LlamaParse) -> None:
202202
result = await markdown_parser.aget_result(expected.job_id)
203203
assert result.job_id == expected.job_id
204204
assert len(result.pages) == len(expected.pages)
205+
206+
207+
@pytest.mark.asyncio
208+
async def test_parse_audio() -> None:
209+
parser = LlamaParse()
210+
filepath = "tests/test_files/hello_world.m4a"
211+
212+
result = await parser.aparse(filepath)
213+
assert result.job_id is not None

py/tests/test_files/hello_world.m4a

9.28 KB
Binary file not shown.

0 commit comments

Comments
 (0)